{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Pima Indians Diabetes Data Set数据探索\n",
    "\n",
    "数据说明：\n",
    "Pima Indians Diabetes Data Set（皮马印第安人糖尿病数据集） 根据现有的医疗信息预测5年内皮马印第安人糖尿病发作的概率。   \n",
    "\n",
    "数据集共9个字段: \n",
    "0列为怀孕次数；\n",
    "1列为口服葡萄糖耐量试验中2小时后的血浆葡萄糖浓度；\n",
    "2列为舒张压（单位:mm Hg）\n",
    "3列为三头肌皮褶厚度（单位：mm）\n",
    "4列为餐后血清胰岛素（单位:mm）\n",
    "5列为体重指数（体重（公斤）/ 身高（米）^2）\n",
    "6列为糖尿病家系作用\n",
    "7列为年龄\n",
    "8列为分类变量（0或1）\n",
    "\n",
    "数据链接：https://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "import必要的工具包，用于文件读取／特征编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据文件路径和文件名"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0          6                           148              72   \n",
       "1          1                            85              66   \n",
       "2          8                           183              64   \n",
       "3          1                            89              66   \n",
       "4          0                           137              40   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin   BMI  \\\n",
       "0                           35              0  33.6   \n",
       "1                           29              0  26.6   \n",
       "2                            0              0  23.3   \n",
       "3                           23             94  28.1   \n",
       "4                           35            168  43.1   \n",
       "\n",
       "   Diabetes_pedigree_function  Age  Target  \n",
       "0                       0.627   50       1  \n",
       "1                       0.351   31       0  \n",
       "2                       0.672   32       1  \n",
       "3                       0.167   21       0  \n",
       "4                       2.288   33       1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#input data\n",
    "train = pd.read_csv(\"pima-indians-diabetes.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 768 entries, 0 to 767\n",
      "Data columns (total 9 columns):\n",
      "pregnants                       768 non-null int64\n",
      "Plasma_glucose_concentration    768 non-null int64\n",
      "blood_pressure                  768 non-null int64\n",
      "Triceps_skin_fold_thickness     768 non-null int64\n",
      "serum_insulin                   768 non-null int64\n",
      "BMI                             768 non-null float64\n",
      "Diabetes_pedigree_function      768 non-null float64\n",
      "Age                             768 non-null int64\n",
      "Target                          768 non-null int64\n",
      "dtypes: float64(2), int64(7)\n",
      "memory usage: 54.1 KB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "粗看数据集没有缺失值\n",
    "但该数据集已知存在缺失值，某些列中存在的缺失值被标记为0。通过这些列中指标的定义和相应领域的常识可以证实上述观点，譬如体重指数和血压两列中的0作为指标数值来说是无意义的。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3.845052</td>\n",
       "      <td>120.894531</td>\n",
       "      <td>69.105469</td>\n",
       "      <td>20.536458</td>\n",
       "      <td>79.799479</td>\n",
       "      <td>31.992578</td>\n",
       "      <td>0.471876</td>\n",
       "      <td>33.240885</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.369578</td>\n",
       "      <td>31.972618</td>\n",
       "      <td>19.355807</td>\n",
       "      <td>15.952218</td>\n",
       "      <td>115.244002</td>\n",
       "      <td>7.884160</td>\n",
       "      <td>0.331329</td>\n",
       "      <td>11.760232</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.078000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.300000</td>\n",
       "      <td>0.243750</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.372500</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>140.250000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>127.250000</td>\n",
       "      <td>36.600000</td>\n",
       "      <td>0.626250</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>17.000000</td>\n",
       "      <td>199.000000</td>\n",
       "      <td>122.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>846.000000</td>\n",
       "      <td>67.100000</td>\n",
       "      <td>2.420000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "count  768.000000                    768.000000      768.000000   \n",
       "mean     3.845052                    120.894531       69.105469   \n",
       "std      3.369578                     31.972618       19.355807   \n",
       "min      0.000000                      0.000000        0.000000   \n",
       "25%      1.000000                     99.000000       62.000000   \n",
       "50%      3.000000                    117.000000       72.000000   \n",
       "75%      6.000000                    140.250000       80.000000   \n",
       "max     17.000000                    199.000000      122.000000   \n",
       "\n",
       "       Triceps_skin_fold_thickness  serum_insulin         BMI  \\\n",
       "count                   768.000000     768.000000  768.000000   \n",
       "mean                     20.536458      79.799479   31.992578   \n",
       "std                      15.952218     115.244002    7.884160   \n",
       "min                       0.000000       0.000000    0.000000   \n",
       "25%                       0.000000       0.000000   27.300000   \n",
       "50%                      23.000000      30.500000   32.000000   \n",
       "75%                      32.000000     127.250000   36.600000   \n",
       "max                      99.000000     846.000000   67.100000   \n",
       "\n",
       "       Diabetes_pedigree_function         Age      Target  \n",
       "count                  768.000000  768.000000  768.000000  \n",
       "mean                     0.471876   33.240885    0.348958  \n",
       "std                      0.331329   11.760232    0.476951  \n",
       "min                      0.078000   21.000000    0.000000  \n",
       "25%                      0.243750   24.000000    0.000000  \n",
       "50%                      0.372500   29.000000    0.000000  \n",
       "75%                      0.626250   41.000000    1.000000  \n",
       "max                      2.420000   81.000000    1.000000  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数值型特征的基本统计量\n",
    "train.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "从结果中我们可以看到很多列的最小值为0。而在一些特定列代表的变量中，0值并没有意义，这就表名该值无效或为缺失值。\n",
    "\n",
    "具体来说，下列变量的最小值为0时数据无意义：\n",
    "1、血浆葡萄糖浓度\n",
    "2、舒张压\n",
    "3、肱三头肌皮褶厚度\n",
    "4、餐后血清胰岛素\n",
    "5、体重指数\n",
    "\n",
    "在Pandas的DataFrame中，通过replace()函数可以很方便的将我们感兴趣的数据子集的值标记为NaN。\n",
    "\n",
    "标记完缺失值之后，可以利用isnull()函数将数据集中所有的NaN值标记为True，然后就可以得到每一列中缺失值的数量了。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pregnants                         0\n",
      "Plasma_glucose_concentration      5\n",
      "blood_pressure                   35\n",
      "Triceps_skin_fold_thickness     227\n",
      "serum_insulin                   374\n",
      "BMI                              11\n",
      "Diabetes_pedigree_function        0\n",
      "Age                               0\n",
      "Target                            0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "NaN_col_names = ['Plasma_glucose_concentration','blood_pressure','Triceps_skin_fold_thickness','serum_insulin','BMI']\n",
    "train[NaN_col_names] = train[NaN_col_names].replace(0, np.NaN)\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 对缺失值较多的特征，新增一个特征，表示这个特征是否缺失"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>Triceps_skin_fold_thickness_Missing</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>29.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>32.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>45.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Triceps_skin_fold_thickness  Triceps_skin_fold_thickness_Missing\n",
       "0                         35.0                                    0\n",
       "1                         29.0                                    0\n",
       "2                          NaN                                    1\n",
       "3                         23.0                                    0\n",
       "4                         35.0                                    0\n",
       "5                          NaN                                    1\n",
       "6                         32.0                                    0\n",
       "7                          NaN                                    1\n",
       "8                         45.0                                    0\n",
       "9                          NaN                                    1"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#缺失值比较多，干脆就开一个新的字段，表明是缺失值还是不是缺失值\n",
    "train['Triceps_skin_fold_thickness_Missing'] = train['Triceps_skin_fold_thickness'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "train[['Triceps_skin_fold_thickness','Triceps_skin_fold_thickness_Missing']].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x26ee3032a58>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAELCAYAAADDZxFQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGnpJREFUeJzt3XuQFeWd//H3Ry6CEUVgdJEBId42EJdBRzRGE2+/H4Ssikm0YJP1HkwVScwmsjGplCLRqiSbROOa6GK8YGJQ10RF10vUqKzZKCIiAi4lRn8yooKorC6iMn5/f/QzchyfGQ44PWdgPq+qU3P66ae7v+cMnM883X26FRGYmZm1tl2tCzAzs67JAWFmZlkOCDMzy3JAmJlZlgPCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsq2etC/goBg0aFMOHD691GWZmW5XHHnvslYio21S/rToghg8fzvz582tdhpnZVkXS/6umn3cxmZlZlgPCzMyyHBBmZpa1VR+DMDOrlXfffZempibWr19f61La1KdPH+rr6+nVq9cWLe+AMDPbAk1NTfTr14/hw4cjqdblfEhEsGbNGpqamhgxYsQWrcO7mMzMtsD69esZOHBglwwHAEkMHDjwI41wHBBmZluoq4ZDi49anwPCzMyyfAzCzKwDrFmzhqOOOgqAl156iR49elBXV3xZed68efTu3bvDt7lgwQJWrVrF+PHjO3zd4IDggGnX1rqELuOxfzmp1iWYbbUGDhzIwoULAZg+fTo77rgjZ599dtXLNzc306NHj83a5oIFC1i8eHFpAeFdTGZmJTvmmGM44IADGDVqFL/+9a8B2LBhA/379+cHP/gBY8eOZd68ecyZM4d9992Xww47jG984xtMnDgRgDfffJNTTjmFsWPHMmbMGG677TbeeustZsyYwXXXXUdDQwM33XRTh9dd2ghCUh9gLrB92s5NEXGepGuAzwJrU9dTImKhiqMpvwAmAOtS+4Ky6jMz6yyzZs1iwIABrFu3jsbGRr74xS/Sr18/1q5dy/77788FF1zAunXr2Gefffjzn//MsGHDOPHEE99ffsaMGYwfP55rrrmG1157jYMOOohFixZx7rnnsnjxYi6++OJS6i5zBPE2cGREjAYagPGSDk7zpkVEQ3osTG2fA/ZOjynAZSXWZmbWaS666CJGjx7Npz71KZqamnjmmWcA6N27N8cffzwAS5cuZd9992WPPfZAEpMnT35/+T/+8Y9ceOGFNDQ0cMQRR7B+/Xqef/750usubQQREQG8mSZ7pUe0s8hxwLVpuYcl9Zc0OCJeLKtGM7Oy3XvvvcydO5eHH36Yvn37cuihh77/3YS+ffu+fypq8dGXFxHccsst7Lnnnh9onzt3bnmFU/IxCEk9JC0EVgH3RMQjadaFkhZJukjS9qltCLCiYvGm1GZmttVau3YtAwYMoG/fvixZsoRHH30022/UqFEsW7aMFStWEBHccMMN788bN24cl1xyyfvTjz/+OAD9+vXjjTfeKK32UgMiIpojogGoB8ZK+iTwPeBvgQOBAcB3U/fcNzo+FKmSpkiaL2n+6tWrS6rczKxjfP7zn2fdunWMHj2aGTNmcNBBB2X77bDDDlx66aUcffTRHHbYYey+++7svPPOAJx33nmsW7eO/fbbj1GjRjF9+nQAjjzySJ544gnGjBmzdR2krhQRr0t6ABgfET9NzW9LuhpoOQ+sCRhasVg9sDKzrpnATIDGxsb2dlmZmdVEywc4FBfMu/vuu7P9Xn/99Q9MH3300SxbtoyI4Mwzz6SxsRGAj33sY1xxxRUfWr6urq7Um6aVNoKQVCepf3reFzga+G9Jg1ObgInA4rTIHOAkFQ4G1vr4g5l1J5dddhkNDQ2MHDmSt956i69+9as1rafMEcRgYJakHhRBdGNE3C7pT5LqKHYpLQS+lvrfQXGK63KK01xPLbE2M7MuZ9q0aUybNq3WZbyvzLOYFgFjMu1HttE/gKll1WNmZpvH36Q2M7MsB4SZmWU5IMzMLKvbX83VzKwjdPSVoau5uvJdd93FWWedRXNzM2eccQbnnHNOh9bgEYSZ2VaoubmZqVOncuedd7J06VJmz57N0qVLO3QbDggzs63QvHnz2Guvvfj4xz9O7969mTRpErfeemuHbsMBYWa2FXrhhRcYOnTjxSfq6+t54YUXOnQbDggzs61Q7uqvLVeG7SgOCDOzrVB9fT0rVmy8AHZTUxO77757h27DAWFmthU68MADefrpp3n22Wd55513uP766zn22GM7dBs+zdXMrANUc1pqR+rZsyeXXnop48aNo7m5mdNOO41Ro0Z17DY6dG1mZtZpJkyYwIQJE0pbv3cxmZlZlgPCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsy6e5mpl1gOdn7Neh6xt27pOb7HPaaadx++23s+uuu7J48eIO3T54BGFmttU65ZRTuOuuu0pbf2kBIamPpHmSnpC0RNL5qX2EpEckPS3pBkm9U/v2aXp5mj+8rNrMzLYFn/nMZxgwYEBp6y9zBPE2cGREjAYagPGSDgZ+DFwUEXsDrwGnp/6nA69FxF7ARamfmZnVSGkBEYU302Sv9AjgSOCm1D4LmJieH5emSfOPUkdfu9bMzKpW6jEIST0kLQRWAfcAzwCvR8SG1KUJGJKeDwFWAKT5a4GBZdZnZmZtKzUgIqI5IhqAemAs8Ilct/QzN1r40B0xJE2RNF/S/NWrV3dcsWZm9gGdcpprRLwu6QHgYKC/pJ5plFAPrEzdmoChQJOknsDOwKuZdc0EZgI0NjZ++JZKZmY1UM1pqR1t8uTJPPDAA7zyyivU19dz/vnnc/rpp296wSqVFhCS6oB3Uzj0BY6mOPB8P/Al4HrgZKDlLttz0vRf0vw/Re6eemZmBsDs2bNLXX+ZI4jBwCxJPSh2Zd0YEbdLWgpcL+kC4HHgytT/SuA3kpZTjBwmlVibmZltQmkBERGLgDGZ9r9SHI9o3b4eOKGseszMbPP4m9RmZluoq+8F/6j1OSDMzLZAnz59WLNmTZcNiYhgzZo19OnTZ4vX4Yv1mZltgfr6epqamujKp9v36dOH+vr6LV7eAWFmtgV69erFiBEjal1GqbyLyczMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVmWA8LMzLIcEGZmluWAMDOzLAeEmZllOSDMzCzLAWFmZlkOCDMzy3JAmJlZVmkBIWmopPslPSVpiaSzUvt0SS9IWpgeEyqW+Z6k5ZKWSRpXVm1mZrZpZd5RbgPwnYhYIKkf8Jike9K8iyLip5WdJY0EJgGjgN2BeyXtExHNJdZoZmZtKG0EEREvRsSC9PwN4ClgSDuLHAdcHxFvR8SzwHJgbFn1mZlZ+zrlGISk4cAY4JHU9HVJiyRdJWmX1DYEWFGxWBOZQJE0RdJ8SfO78s3Czcy2dqUHhKQdgd8D34qI/wEuA/YEGoAXgZ+1dM0sHh9qiJgZEY0R0VhXV1dS1WZmVmpASOpFEQ7XRcQfACLi5Yhojoj3gCvYuBupCRhasXg9sLLM+szMrG1lnsUk4ErgqYj4eUX74IpuxwOL0/M5wCRJ20saAewNzCurPjMza1+ZZzF9GvhH4ElJC1Pb94HJkhoodh89B5wJEBFLJN0ILKU4A2qqz2AyM6ud0gIiIh4if1zhjnaWuRC4sKyazMysev4mtZmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVmWA8LMzLIcEGZmluWAMDOzLAeEmZllVRUQku6rps3MzLYd7d5yVFIfYAdgkKRd2HgL0Z2A3UuuzczMamhTI4gzgceAv00/Wx63Ar9sb0FJQyXdL+kpSUsknZXaB0i6R9LT6ecuqV2SLpG0XNIiSft/1BdnZmZbrt2AiIhfRMQI4OyI+HhEjEiP0RFx6SbWvQH4TkR8AjgYmCppJHAOcF9E7A3cl6YBPgfsnR5TgMu2/GWZmdlH1e4uphYR8a+SDgGGVy4TEde2s8yLwIvp+RuSngKGAMcBh6dus4AHgO+m9msjIoCHJfWXNDitx8zMOllVASHpN8CewEKgOTUH0GZAtFp+ODAGeATYreVDPyJelLRr6jYEWFGxWFNqc0CYmdVAVQEBNAIj01/3m0XSjsDvgW9FxP9IarNrpu1D25M0hWIXFMOGDdvccszMrErVfg9iMfA3m7tySb0owuG6iPhDan5Z0uA0fzCwKrU3AUMrFq8HVrZeZ0TMjIjGiGisq6vb3JLMzKxK1QbEIGCppLslzWl5tLeAiqHClcBTEfHzillzgJPT85MpzohqaT8pnc10MLDWxx/MzGqn2l1M07dg3Z8G/hF4UtLC1PZ94EfAjZJOB54HTkjz7gAmAMuBdcCpW7BNMzPrINWexfTg5q44Ih4if1wB4KhM/wCmbu52zMysHNWexfQGGw8Y9wZ6Af8bETuVVZiZmdVWtSOIfpXTkiYCY0upyMzMuoQtupprRNwCHNnBtZiZWRdS7S6mL1RMbkfxvYjN/k6EmZltPao9i+mYiucbgOcoLo1hZmbbqGqPQfiUUzOzbqbaGwbVS7pZ0ipJL0v6vaT6soszM7PaqXYX09XA79j4pbavpLb/U0ZRVhvPz9iv1iV0GcPOfbLWJZjVXLVnMdVFxNURsSE9rgF8ISQzs21YtQHxiqSvSOqRHl8B1pRZmJmZ1Va1AXEacCLwEsX9Gb6Er5VkZrZNq/YYxA+BkyPiNSjuKw38lCI4zMxsG1TtCOLvWsIBICJepbhDnJmZbaOqDYjtJO3SMpFGENWOPszMbCtU7Yf8z4D/knQTxSU2TgQuLK0qMzOruWq/SX2tpPkUF+gT8IWIWFpqZWZmVlNV7yZKgeBQMDPrJrboct9mZrbtc0CYmVmWA8LMzLJKCwhJV6Wrvy6uaJsu6QVJC9NjQsW870laLmmZpHFl1WVmZtUpcwRxDTA+035RRDSkxx0AkkYCk4BRaZlfSepRYm1mZrYJpQVERMwFXq2y+3HA9RHxdkQ8CywHxpZVm5mZbVotjkF8XdKitAuq5dvZQ4AVFX2aUpuZmdVIZwfEZcCeQAPFVWF/ltqV6Ru5FUiaImm+pPmrV68up0ozM+vcgIiIlyOiOSLeA65g426kJmBoRdd6YGUb65gZEY0R0VhX53sWmZmVpVMDQtLgisnjgZYznOYAkyRtL2kEsDcwrzNrMzOzDyrtiqySZgOHA4MkNQHnAYdLaqDYffQccCZARCyRdCPFpTw2AFMjorms2szMbNNKC4iImJxpvrKd/hfiK8SamXUZ/ia1mZll+aY/Zl3UAdOurXUJXcZj/3JSrUvoljyCMDOzLAeEmZllOSDMzCzLAWFmZlkOCDMzy3JAmJlZlgPCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWWVFhCSrpK0StLiirYBku6R9HT6uUtql6RLJC2XtEjS/mXVZWZm1SlzBHENML5V2znAfRGxN3Bfmgb4HLB3ekwBLiuxLjMzq0JpARERc4FXWzUfB8xKz2cBEyvar43Cw0B/SYPLqs3MzDats49B7BYRLwKkn7um9iHAiop+TanNzMxqpKscpFamLbIdpSmS5kuav3r16pLLMjPrvjo7IF5u2XWUfq5K7U3A0Ip+9cDK3AoiYmZENEZEY11dXanFmpl1Z50dEHOAk9Pzk4FbK9pPSmczHQysbdkVZWZmtdGzrBVLmg0cDgyS1AScB/wIuFHS6cDzwAmp+x3ABGA5sA44tay6zMysOqUFRERMbmPWUZm+AUwtqxYzM9t8XeUgtZmZdTGljSDMzDrK8zP2q3UJXcawc5/stG15BGFmZlkOCDMzy3JAmJlZlgPCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWTW5o5yk54A3gGZgQ0Q0ShoA3AAMB54DToyI12pRn5mZ1XYEcURENEREY5o+B7gvIvYG7kvTZmZWI11pF9NxwKz0fBYwsYa1mJl1e7UKiAD+KOkxSVNS224R8SJA+rlrjWozMzNqdAwC+HRErJS0K3CPpP+udsEUKFMAhg0bVlZ9ZmbdXk1GEBGxMv1cBdwMjAVeljQYIP1c1cayMyOiMSIa6+rqOqtkM7Nup9MDQtLHJPVreQ78X2AxMAc4OXU7Gbi1s2szM7ONarGLaTfgZkkt2/9dRNwl6VHgRkmnA88DJ9SgNjMzSzo9ICLir8DoTPsa4KjOrsfMzPK60mmuZmbWhTggzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVmWA8LMzLIcEGZmluWAMDOzLAeEmZllOSDMzCzLAWFmZlkOCDMzy+pyASFpvKRlkpZLOqfW9ZiZdVddKiAk9QB+CXwOGAlMljSytlWZmXVPXSoggLHA8oj4a0S8A1wPHFfjmszMuqWuFhBDgBUV002pzczMOlnPWhfQijJt8YEO0hRgSpp8U9Ky0qvqJvaAQcArta6jSzgv90/RasX/Nit0zL/NParp1NUCogkYWjFdD6ys7BARM4GZnVlUdyFpfkQ01roOs9b8b7M2utoupkeBvSWNkNQbmATMqXFNZmbdUpcaQUTEBklfB+4GegBXRcSSGpdlZtYtdamAAIiIO4A7al1HN+Vdd9ZV+d9mDSgiNt3LzMy6na52DMLMzLoIB4T58ibWZUm6StIqSYtrXUt35IDo5nx5E+virgHG17qI7soBYb68iXVZETEXeLXWdXRXDgjz5U3MLMsBYZu8vImZdU8OCNvk5U3MrHtyQJgvb2JmWQ6Ibi4iNgAtlzd5CrjRlzexrkLSbOAvwL6SmiSdXuuauhN/k9rMzLI8gjAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDopuRNFDSwvR4SdILFdO9W/W9W1K/WtXamqSHJDVk2reoTkkjJT0h6XFJw9vo01PS623M+62kie2s/9uS+lSxnqmSvtzOeo6WdEt7r6UzSDpDUkj6bEXbCaltYpq+WtK+m7ne4yVN6+h67aPrcrcctXJFxBqgAUDSdODNiPhpZR9JoviOzLjOr3DzfYQ6vwDcFBE/7Mh6KnwbuApY316niPhlSdsvw5PAZODBND0JeKJlZkScurkrjIibO6Y062geQRgAkvaStFjS5cACYHD65mr/NP9USYvSX9xXp7bdJP1B0nxJ8yQdnNovkDRL0v2SnpZ0WmofkkYBC9O2Dmmjlp6SfiPpydTvm63m90h/vU9P002S+le8hislLZF0Z8tf8JltHEvxDfKvSbo3tf1zWn6xpG9kltlO0q8kLZV0GzConffzn4Bdgf9sWX9q/1F6D/8iadeK9+tb6fk+kv6U+ixoPbKRdFBLe1ruSkkPSvqrpKkV/U5Ov5OFqebt2npfJf1Tek1PSPptW68peQA4JK1rJ2AY8P7NfFpGeZuzrTQyuTg9/62kX0j6r/Sajk/tPSRdnn6vt0m6S+2M3qxjeARhlUYCp0bE1wCKgQRIGg18FzgkIl6VNCD1vwT4SUQ8nD7Ibgc+mebtBxwC7AQskPQfwFeA2yLixypuVNS3jToOAAZFxH5p+/0r5vUEfgcsiIgfZ5bdF5gcEU9K+gMwkeIeFx8QEXMkjQVeiYiL0/MvU9wfowcwT9KDwNKKxb4EjEivcfc07/LcC4iIiyR9BzgsIl6X1BPYGXgwIs6R9HPgNOBHrRadDUyPiNtSuG0H7JXeh8OAi4BjI6Ip/X72AY4C+gNPqQj4TwDHU/y+NkiaSfGX/jNtvK//DOwREe+0eq9z3qMIiaOB3YBb0vZaa+t3WM22dgU+TfFv6EbgZuAEisvQ7wf8DcVlYbLvvXUcjyCs0jMR8Wim/Ujghoh4FaDlJ8WHxOWSFlJ8UOwiqeVD/5aIWB8Rq4C5wIEUFwY8Q9J5wCcj4s026lhOce2dX0gaB6ytmHclbYcDFDc/ejI9fwwYvonX3OIw4PcRsS4i3kiv59BWfT4DzI6I9yKiieKDcnO8FRF3tlWbpF0oPlRvA0jv37o0+5PAr4C/T9tucXtEvJPe51eBOorfy4HA/PS7+SywJ22/r0uA36o4DvJuFa/jeorAmUQmfJOPsq1borCIjfcmOZTiOmHvRcRKNu7ishI5IKzS/7bRLvL3iBAwNiIa0mNIRLyV5rXuHxHxJ+Bw4EXgOrVxYDYdJ/k74CHgm8C/Vcz+M3CUpO3bqPXtiufNVD9Kzt0XI1telf1y3ql43lZtba1/ZVq+9UH63OsVcFXF72XfiPhhO+/rOIq/xsdShEqPTbyOvwD7AztFxDO5Dh9xW5WvSa1+WidyQFg17gUmtexaqtjFdC9Qud+78sNroqTtJQ2i+Ot8vqQ9gJciYibFvYbH5DYmqY7iIPm/A+dRfBi1mJm2e33abdNR5gLHS+oraUeK267+Z6bPpLQ/fwjFX+bteQOo+uyqiHgNeEXSMQCS+kjaIc1+Ffh74CdpV1N77gVOTO99y5lrw3Lva/qArk/hPY1iBLJDWytOdQbwPeD7bfXpqG1VeAj4kgqDKUZzVjIfg7BNiohFkn4CzJW0gWL3yOkU4XCZpFMp/i3dz8bAeBS4k+JmROdFxMsqDlZ/W9K7wJsUxyRyhgJXqtjJHhTHPyrr+YmkC4FrJJ3UQa9xnopLS7fsYrssHceo/D9yE3AExUHZZRSB0Z6ZwL2SVgDjqyzly8C/pdf3DvDFihpfVHFw/Y72Xneq+/y07e0oduV8jWKE0fp97Qn8TsVpwtsBP0672NoVEf+xiS6532F2Wy3HujbhRopdnS3v/SN8cNejlcCX+7YOJ+kC0sHfWtdi2w5JO0bEm2l08ghwUESsrnVd2zKPIMxsa3FnOrW2F8Wo1OFQMo8grKYkzefDf6j8Q0QszfXfwm1cDhzcqvnnEXFtB61/DsX3ASqdHRH35vp3dZLOoPiOSKW5EfHNXH/bdjkgzMwsy2cxmZlZlgPCzMyyHBBmZpblgDAzsywHhJmZZf1/ghDaRNNOWUIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "#color = sns.color_palette()\n",
    "\n",
    "%matplotlib inline\n",
    "sns.countplot(x=\"Triceps_skin_fold_thickness_Missing\", hue=\"Target\",data=train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x26ee310f3c8>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAELCAYAAADDZxFQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFWVJREFUeJzt3XuwnHWd5/H311wmXLJAQmAIB0hAYCYRk8Ah6CquAjXEOHLxwpBaFQQJM4suTg3UMK4FMWN2nBoVZZhhC5AlzCKX9QKBTQUwNZjSFUPAGEIYBIUlh1tCxAgTAuTw3T/6OdCEX5JOOM/pPjnvV1VX9/Pr3+/3fLvr1Pn0c+mnIzORJGlz72h3AZKkzmRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklQ0vN0FvB177713Tpgwod1lSNKgct999z2XmeO21W9QB8SECRNYtmxZu8uQpEElIv5fK/3cxSRJKjIgJElFBoQkqWhQH4OQpHZ59dVX6enpYePGje0uZYtGjRpFV1cXI0aM2KHxBoQk7YCenh5Gjx7NhAkTiIh2l/MWmcm6devo6elh4sSJOzSHu5gkaQds3LiRsWPHdmQ4AEQEY8eOfVtbOAaEJO2gTg2HPm+3PgNCklTkMQhJ6gfr1q3j+OOPB+CZZ55h2LBhjBvX+LLy0qVLGTlyZL+v8/7772fNmjXMmDGj3+cGA4KjLryu3SV0jPv+4TPtLkEatMaOHcvy5csBmDNnDrvvvjsXXHBBy+N7e3sZNmzYdq3z/vvvZ+XKlbUFhLuYJKlmH/3oRznqqKOYPHkyV199NQCbNm1izz335Mtf/jLTp09n6dKlLFiwgMMPP5xjjz2WL3zhC5xyyikAvPjii5x55plMnz6dadOmcdttt/HSSy8xd+5crr/+eqZOncr3vve9fq97yG9BSFLd5s+fz5gxY9iwYQPd3d18/OMfZ/To0axfv54jjzySr371q2zYsIHDDjuMn/70pxx44IGcdtppr4+fO3cuM2bM4Nprr+X555/nmGOOYcWKFVx88cWsXLmSb33rW7XU7RaEJNXs0ksvZcqUKbz3ve+lp6eHX//61wCMHDmSU089FYBVq1Zx+OGHc9BBBxERzJo16/Xxd955J/PmzWPq1Kl86EMfYuPGjTzxxBO11+0WhCTV6Ec/+hFLlizhnnvuYZddduH973//699N2GWXXV4/FTUztzhHZnLLLbdwyCGHvKl9yZIl9RWOWxCSVKv169czZswYdtllFx588EHuvffeYr/Jkyfz8MMPs3r1ajKTm2666fXnTjzxRC677LLXl3/xi18AMHr0aF544YXaajcgJKlGH/nIR9iwYQNTpkxh7ty5HHPMMcV+u+66K5dffjknnHACxx57LOPHj2ePPfYA4JJLLmHDhg0cccQRTJ48mTlz5gBw3HHH8ctf/pJp06YNroPUEXEAcB3wh8BrwJWZ+e2ImAOcA6ytun4pMxdWY/4GOBvoBf5rZt5RV32SVJe+f+DQuGDeHXeU/5X97ne/e9PyCSecwMMPP0xmcu6559Ld3Q3AbrvtxlVXXfWW8ePGjav1R9PqPAaxCfirzLw/IkYD90XEXdVzl2bm15s7R8Qk4HRgMjAe+FFEHJaZvTXWKEkd44orruD666/n5Zdfpru7m3POOaet9dQWEJn5NPB09fiFiHgI2H8rQ04GbszMl4HHIuJRYDrws7pqlKROcuGFF3LhhRe2u4zXDcgxiIiYAEwDfl41fT4iVkTENRGxV9W2P7C6aVgPWw8USVKNag+IiNgd+D7wxcz8PXAFcAgwlcYWxjf6uhaGv+W8r4iYHRHLImLZ2rVrC0MkSf2h1oCIiBE0wuH6zPwBQGY+m5m9mfkacBWN3UjQ2GI4oGl4F/DU5nNm5pWZ2Z2Z3X0XwpIk9b/aAiIa3/74DvBQZn6zqX2/pm6nAiurxwuA0yPiDyJiInAosLSu+iRJW1fnWUzvAz4NPBARy6u2LwGzImIqjd1HjwPnAmTmgxFxM7CKxhlQ53kGk6TBor+vDN3K1ZUXLVrE+eefT29vL5/73Oe46KKL+rWGOs9i+gnl4woLtzJmHjCvrpokaWfR29vLeeedx1133UVXVxdHH300J510EpMmTeq3dfhNakkahJYuXco73/lODj74YEaOHMnpp5/Orbfe2q/rMCAkaRB68sknOeCAN87r6erq4sknn+zXdRgQkjQIla7+2ndl2P5iQEjSINTV1cXq1W98t7inp4fx48f36zoMCEkahI4++mgeeeQRHnvsMV555RVuvPFGTjrppH5dhz8YJEn9oJXTUvvT8OHDufzyyznxxBPp7e3lrLPOYvLkyf27jn6dTVK/6e/z6gezgf7nO1jMnDmTmTNn1ja/u5gkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSijzNVZL6wRNzj+jX+Q68+IFt9jnrrLO4/fbb2WeffVi5cuU2+28vtyAkaZA688wzWbRoUW3zGxCSNEh94AMfYMyYMbXNb0BIkooMCElSkQEhSSoyICRJRZ7mKkn9oJXTUvvbrFmzuPvuu3nuuefo6uriK1/5CmeffXa/zW9ASNIgdcMNN9Q6v7uYJElFBoQkqciAkKQdlJntLmGr3m59BoQk7YBRo0axbt26jg2JzGTdunWMGjVqh+fwILUk7YCuri56enpYu3Ztu0vZolGjRtHV1bXD4w0ISdoBI0aMYOLEie0uo1buYpIkFRkQkqSi2gIiIg6IiH+NiIci4sGIOL9qHxMRd0XEI9X9XlV7RMRlEfFoRKyIiCPrqk2StG11bkFsAv4qM/8YeA9wXkRMAi4CFmfmocDiahngw8Ch1W02cEWNtUmStqG2gMjMpzPz/urxC8BDwP7AycD8qtt84JTq8cnAddlwD7BnROxXV32SpK0bkGMQETEBmAb8HNg3M5+GRogA+1Td9gdWNw3rqdokSW1Qe0BExO7A94EvZubvt9a10PaWb6BExOyIWBYRyzr5/GNJGuxqDYiIGEEjHK7PzB9Uzc/27Tqq7tdU7T3AAU3Du4CnNp8zM6/MzO7M7B43blx9xUvSEFfnWUwBfAd4KDO/2fTUAuCM6vEZwK1N7Z+pzmZ6D7C+b1eUJGng1flN6vcBnwYeiIjlVduXgK8BN0fE2cATwCer5xYCM4FHgQ3AZ2usTZK0DbUFRGb+hPJxBYDjC/0TOK+ueiRJ28dvUkuSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQV1RYQEXFNRKyJiJVNbXMi4smIWF7dZjY99zcR8WhEPBwRJ9ZVlySpNS0FREQsbqVtM9cCMwrtl2bm1Oq2sJprEnA6MLka888RMayV2iRJ9dhqQETEqIgYA+wdEXtFxJjqNgEYv7WxmbkE+G2LdZwM3JiZL2fmY8CjwPQWx0qSarCtLYhzgfuAP6ru+263Av+0g+v8fESsqHZB7VW17Q+sburTU7VJktpkqwGRmd/OzInABZl5cGZOrG5TMvPyHVjfFcAhwFTgaeAbVXuUVl+aICJmR8SyiFi2du3aHShBktSK4a10ysx/jIj/CExoHpOZ123PyjLz2b7HEXEVcHu12AMc0NS1C3hqC3NcCVwJ0N3dXQwRSdLb11JARMS/0PjkvxzorZoT2K6AiIj9MvPpavFUoO8MpwXAdyPimzSObRwKLN2euSVJ/aulgAC6gUmZ2fIn9oi4AfggjQPcPcAlwAcjYiqNcHmcxjEOMvPBiLgZWAVsAs7LzN7SvJKkgdFqQKwE/pDGcYOWZOasQvN3ttJ/HjCv1fklSfVqNSD2BlZFxFLg5b7GzDyplqokSW3XakDMqbMISVLnafUsph/XXYgkqbO0ehbTC7zxvYSRwAjg3zPzP9RVmCSpvVrdghjdvBwRp+ClMCRpp9bqMYg3ycxbIuKi/i5GkkqemHtEu0voGAde/MCAravVXUwfa1p8B43vRfgtZknaibW6BfHRpsebaHzJ7eR+r0aS1DFaPQbx2boLkSR1llZ/MKgrIn5Y/ULcsxHx/Yjoqrs4SVL7tPqTo/+TxgX1xtP4nYbbqjZJ0k6q1WMQ4zKzORCujYgv1lGQ2sczRd4wkGeKSJ2q1S2I5yLiUxExrLp9ClhXZ2GSpPZqNSDOAk4DnqFxRddPAB64lqSdWKu7mP4WOCMznweIiDHA12kEhyRpJ9TqFsS7+8IBIDN/C0yrpyRJUidoNSDeERF79S1UWxA7dJkOSdLg0Oo/+W8A/zcivkfjEhun4a+/SdJOrdVvUl8XEcuA44AAPpaZq2qtTJLUVi3vJqoCwVCQpCGi1WMQkqQhxoCQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpKLaAiIiromINRGxsqltTETcFRGPVPd7Ve0REZdFxKMRsSIijqyrLklSa+rcgrgWmLFZ20XA4sw8FFhcLQN8GDi0us0GrqixLklSC2oLiMxcAvx2s+aTgfnV4/nAKU3t12XDPcCeEbFfXbVJkrZtoI9B7JuZTwNU9/tU7fsDq5v69VRtbxERsyNiWUQsW7t2ba3FStJQ1ikHqaPQlqWOmXllZnZnZve4ceNqLkuShq6BDohn+3YdVfdrqvYe4ICmfl3AUwNcmySpyUAHxALgjOrxGcCtTe2fqc5meg+wvm9XlCSpPVr+ydHtFRE3AB8E9o6IHuAS4GvAzRFxNvAE8Mmq+0JgJvAosAH4bF11SZJaU1tAZOasLTx1fKFvAufVVYskaft1ykFqSVKHMSAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklQ0vB0rjYjHgReAXmBTZnZHxBjgJmAC8DhwWmY+3476JEnt3YL4UGZOzczuavkiYHFmHgosrpYlSW3SSbuYTgbmV4/nA6e0sRZJGvLaFRAJ3BkR90XE7Kpt38x8GqC636dNtUmSaNMxCOB9mflUROwD3BUR/9bqwCpQZgMceOCBddUnSUNeW7YgMvOp6n4N8ENgOvBsROwHUN2v2cLYKzOzOzO7x40bN1AlS9KQM+ABERG7RcTovsfAnwArgQXAGVW3M4BbB7o2SdIb2rGLaV/ghxHRt/7vZuaiiLgXuDkizgaeAD7ZhtokSZUBD4jM/A0wpdC+Djh+oOuRJJV10mmukqQOYkBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKmo4wIiImZExMMR8WhEXNTueiRpqOqogIiIYcA/AR8GJgGzImJSe6uSpKGpowICmA48mpm/ycxXgBuBk9tckyQNSZ0WEPsDq5uWe6o2SdIAG97uAjYThbZ8U4eI2cDsavHFiHi49qqGiINgb+C5dtfRES4p/SmqXfzbbNI/f5sHtdKp0wKiBzigabkLeKq5Q2ZeCVw5kEUNFRGxLDO7212HtDn/Ntuj03Yx3QscGhETI2IkcDqwoM01SdKQ1FFbEJm5KSI+D9wBDAOuycwH21yWJA1JHRUQAJm5EFjY7jqGKHfdqVP5t9kGkZnb7iVJGnI67RiEJKlDGBDy8ibqWBFxTUSsiYiV7a5lKDIghjgvb6IOdy0wo91FDFUGhLy8iTpWZi4BftvuOoYqA0Je3kRSkQGhbV7eRNLQZEBom5c3kTQ0GRDy8iaSigyIIS4zNwF9lzd5CLjZy5uoU0TEDcDPgMMjoicizm53TUOJ36SWJBW5BSFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIlYj484j4TD/PeW1EfKJ6fPWOXCk3IuZEREbEO5va/rJq666WF0bEnts5b7+/Xu1cOu4nR6VtiYjh1Rf8+lVm/o/+nnOz+T/3NoY/QONb7l+tlj8BrGqae+YO1FPr69Xg5xaE2iYidouI/xMRv4yIlRHxZxFxVET8OCLui4g7ImK/qu/dEfHfI+LHwPnNn8yr51+s7j9Yjb85In4VEV+LiP8cEUsj4oGIOGQr9cyJiAua1vf31bhfRcSxVfvkqm15RKyIiEMjYkLzD9pExAURMacw/91Nn/hfjIh51Wu/JyL23cbbdQvVZdgj4mBgPbC2ae7HI2Lv0ntaPf+1iFhV1fz17Xi9u1bv5YqIuCkift73GrTzMyDUTjOApzJzSma+C1gE/CPwicw8CrgGmNfUf8/M/E+Z+Y1tzDsFOB84Avg0cFhmTgeuBr6wHfUNr8Z9Ebikavtz4NuZORXopnGxwx2xG3BPZk4BlgDnbKP/74HVEfEuYBZw0xb6veU9jYgxwKnA5Mx8N29shWyu9Hr/C/B8Ne5vgaNae3naGRgQaqcHgBOqT67H0riq7LuAuyJiOfBlGleX7bOlf4qbuzczn87Ml4FfA3c2rW/CdtT3g+r+vqZxPwO+FBF/DRyUmS9tx3zNXgFuL8y/NTfS2M10CvDDLfR503uametphMtG4OqI+BiwYQtjS6/3/dV6ycyVwIoW6tROwoBQ22Tmr2h8In0A+Dvg48CDmTm1uh2RmX/SNOTfmx5vovr7jYgARjY993LT49eall9j+4679Y3r7RuXmd8FTgJeAu6IiOOaa6mMamHuV/ONC6G9Pv823EZji+iJzPx9qcPm72lEXFwdr5kOfJ9GuCzawvxveb2Ufy9EQ4QBobaJiPHAhsz8X8DXgWOAcRHx3ur5ERExeQvDH+eN3R0nAyNqLpeqpoOB32TmZTQui/5u4Flgn4gYGxF/APxpHeuutlb+mjfvdtu8vs3f0yMjYndgj8xcSGP30dTtWO1PgNOquSfR2G2nIcKzmNRORwD/EBGvAa8Cf0Hj0/hlEbEHjb/PbwGly49fBdwaEUuBxbx566JOfwZ8KiJeBZ4B5mbmqxExF/g58Bjwb3WtPDNv3EaX0ns6msZ7NYrGFsFfbscq/xmYHxErgF/Q2MW0frsL16Dk5b4lbVFEDANGZObG6gywxTQO+r/S5tI0ANyCkLQ1uwL/GhEjaGx9/IXhMHS4BaEhJyL+G/DJzZr/d2Zucd/+QOjUujR0GRCSpCLPYpIkFRkQkqQiA0KSVGRASJKKDAhJUtH/BzWoxajWghiZAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#缺失值比较多，干脆就开一个新的字段，表明是缺失值还是不是缺失值\n",
    "train['serum_insulin_Missing'] = train['serum_insulin'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "sns.countplot(x=\"serum_insulin_Missing\", hue=\"Target\",data=train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "不过特征是否缺失好像和目标也没什么关系"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "train.drop([\"Triceps_skin_fold_thickness_Missing\", \"serum_insulin_Missing\"], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "感觉特征缺失是随机的，将这新增的特征删除，老实用中值填补算了。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pregnants                       0\n",
      "Plasma_glucose_concentration    0\n",
      "blood_pressure                  0\n",
      "Triceps_skin_fold_thickness     0\n",
      "serum_insulin                   0\n",
      "BMI                             0\n",
      "Diabetes_pedigree_function      0\n",
      "Age                             0\n",
      "Target                          0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "medians = train.median() \n",
    "train = train.fillna(medians)\n",
    "\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据标准化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\data.py:645: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\base.py:464: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n",
      "  return self.fit(X, **fit_params).transform(X)\n"
     ]
    }
   ],
   "source": [
    "#  get labels\n",
    "y_train = train['Target']   \n",
    "X_train = train.drop([\"Target\"], axis=1)\n",
    "\n",
    "#用于保存特征工程之后的结果\n",
    "feat_names = X_train.columns\n",
    "\n",
    "# 数据标准化\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "# 初始化特征的标准化器\n",
    "ss_X = StandardScaler()\n",
    "\n",
    "# 分别对训练和测试数据的特征进行标准化处理\n",
    "X_train = ss_X.fit_transform(X_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征处理结果存为文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#存为csv格式\n",
    "X_train = pd.DataFrame(columns = feat_names, data = X_train)\n",
    "\n",
    "train = pd.concat([X_train, y_train], axis = 1)\n",
    "\n",
    "train.to_csv('FE_pima-indians-diabetes.csv',index = False,header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0   0.639947                      0.866045       -0.031990   \n",
       "1  -0.844885                     -1.205066       -0.528319   \n",
       "2   1.233880                      2.016662       -0.693761   \n",
       "3  -0.844885                     -1.073567       -0.528319   \n",
       "4  -1.141852                      0.504422       -2.679076   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "0                     0.670643      -0.181541  0.166619   \n",
       "1                    -0.012301      -0.181541 -0.852200   \n",
       "2                    -0.012301      -0.181541 -1.332500   \n",
       "3                    -0.695245      -0.540642 -0.633881   \n",
       "4                     0.670643       0.316566  1.549303   \n",
       "\n",
       "   Diabetes_pedigree_function       Age  Target  \n",
       "0                    0.468492  1.425995       1  \n",
       "1                   -0.365061 -0.190672       0  \n",
       "2                    0.604397 -0.105584       1  \n",
       "3                   -0.920763 -1.041549       0  \n",
       "4                    5.484909 -0.020496       1  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train['Target']   \n",
    "X_train = train.drop([\"Target\"], axis=1)\n",
    "\n",
    "#保存特征名字以备后用（可视化）\n",
    "feat_names = X_train.columns \n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "默认参数的Logistic Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of each fold is:  [0.48797856 0.53011593 0.4562292  0.422546   0.48392885]\n",
      "cv logloss is: 0.47615970944434044\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "lr = LogisticRegression()\n",
    "\n",
    "# 交叉验证用于评估模型性能和进行参数调优（模型选择）\n",
    "#分类任务中交叉验证缺省是采用StratifiedKFold\n",
    "#采用5折交叉验证\n",
    "from sklearn.model_selection import cross_val_score\n",
    "loss = cross_val_score(lr, X_train, y_train, cv=5, scoring='neg_log_loss')\n",
    "#%timeit loss_sparse = cross_val_score(lr, X_train_sparse, y_train, cv=3, scoring='neg_log_loss')\n",
    "print ('logloss of each fold is: ',-loss)\n",
    "print ('cv logloss is:', -loss.mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Logistic Regression + GridSearchCV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "logistic回归的需要调整超参数有：C（正则系数，一般在log域（取log后的值）均匀设置候选参数）和正则函数penalty（L2/L1） 目标函数为：J = C* sum(logloss(f(xi), yi)) + penalty\n",
    "\n",
    "在sklearn框架下，不同学习器的参数调整步骤相同：\n",
    "\n",
    "设置参数搜索范围\n",
    "生成学习器实例（参数设置）\n",
    "生成GridSearchCV的实例（参数设置）\n",
    "调用GridSearchCV的fit方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='warn',\n",
       "          n_jobs=None, penalty='l2', random_state=None, solver='liblinear',\n",
       "          tol=0.0001, verbose=0, warm_start=False),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'penalty': ['l1', 'l2'], 'C': [0.1, 1, 10, 100, 1000]},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "       scoring='neg_log_loss', verbose=0)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n",
    "#需要调优的参数\n",
    "# 请尝试将L1正则和L2正则分开，并配合合适的优化求解算法（slover）\n",
    "#tuned_parameters = {'penalty':['l1','l2'],\n",
    "#                   'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]\n",
    "#                   }\n",
    "penaltys = ['l1','l2']\n",
    "Cs = [ 0.1, 1, 10, 100, 1000]\n",
    "tuned_parameters = dict(penalty = penaltys, C = Cs)\n",
    "\n",
    "lr_penalty= LogisticRegression(solver='liblinear')\n",
    "grid= GridSearchCV(lr_penalty, tuned_parameters,cv=5, scoring='neg_log_loss',n_jobs = 4,)\n",
    "grid.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.4760269461670293\n",
      "{'C': 1, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "# examine the best model\n",
    "print(-grid.best_score_)\n",
    "print(grid.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "正确率调优超参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy of each fold is:  [0.75974026 0.74025974 0.78571429 0.79738562 0.77124183]\n",
      "cv accuracy is: 0.7708683473389355\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "#默认参数\n",
    "loss2 = cross_val_score(lr, X_train, y_train, cv=5, scoring='accuracy')\n",
    "#%timeit loss_sparse = cross_val_score(lr, X_train_sparse, y_train, cv=3, scoring='neg_log_loss')\n",
    "print ('accuracy of each fold is: ',loss2)\n",
    "print ('cv accuracy is:', loss2.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='warn',\n",
       "          n_jobs=None, penalty='l2', random_state=None, solver='liblinear',\n",
       "          tol=0.0001, verbose=0, warm_start=False),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'penalty': ['l1', 'l2'], 'C': [0.1, 1, 10, 100, 1000]},\n",
       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "       scoring='accuracy', verbose=0)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tuned_parameters2 = dict(penalty = penaltys, C = Cs)\n",
    "\n",
    "lr_penalty2= LogisticRegression(solver='liblinear')\n",
    "grid2= GridSearchCV(lr_penalty, tuned_parameters,cv=5, scoring='accuracy',n_jobs = 4,)\n",
    "grid2.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.7747395833333334\n",
      "{'C': 0.1, 'penalty': 'l2'}\n"
     ]
    }
   ],
   "source": [
    "# examine the best model\n",
    "print(grid2.best_score_)\n",
    "print(grid2.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import LinearSVC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "def fit_grid_point_Linear2(penalty, C, X_train, y_train):\n",
    "    \n",
    "    # 在训练集上训练SVC\n",
    "    SVC2 =  LinearSVC( penalty = penalty,C = C)\n",
    "    SVC2 = SVC2.fit(X_train, y_train)\n",
    "    \n",
    "    # 在校验集上返回accuracy\n",
    "    #accuracy = SVC2.score(X_val, y_val)\n",
    "    score = cross_val_score(SVC2, X_train, y_train, cv=5)\n",
    "    \n",
    "    print(\"penalty = {} , C= {} : score.mean()= {} \" .format(penalty, C, score.mean()))\n",
    "    return score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "penalty = l2 , C= 0.1 : score.mean()= 0.7708598590951532 \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "penalty = l2 , C= 1.0 : score.mean()= 0.7721670486376369 \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "penalty = l2 , C= 10.0 : score.mean()= 0.7656650539003479 \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "penalty = l2 , C= 100.0 : score.mean()= 0.7396825396825396 \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "D:\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
      "  \"the number of iterations.\", ConvergenceWarning)\n",
      "No handles with labels found to put in legend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "penalty = l2 , C= 1000.0 : score.mean()= 0.6654019183430948 \n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XucVVXdx/HPb7gNICqX8QIiYIGKN8QJTcXEUjF9UCtRzNJKzQrDyhuKiBClYFmYllj6euwiKZJiYYiXvJSogxcUkIv4hCOGgEISIAi/54+1Jw5zO3tmzj77zJnv+/U6L87ZZ+053znMnN/stfday9wdERGR+pSkHUBERAqfioWIiGSlYiEiIlmpWIiISFYqFiIikpWKhYiIZKViISIiWalYiIhIVioWIiKSVeu0A+RKt27dvHfv3mnHEBFpVubNm7fG3cuytSuaYtG7d28qKirSjiEi0qyY2T/jtFM3lIiIZKViISIiWalYiIhIVkVzzkJEpKXbunUrlZWVbN68ucZzpaWl7LPPPrRp06ZRX1vFQkSkSFRWVtKpUyd69+6Nmf13u7uzdu1aKisr6dOnT6O+trqhRESKxObNm+natetOhQLAzOjatWutRxxxqViIiBSR6oUi2/a41A3VArnD1q2waRNs3lzzVtf2quc++gi6d4cDDoADD4Ru3aCJP4ciUuBULFKyfXvDP6Rz+Vwul17v0iUUjariUfVvr17QqlXuXkdE0tPii8X27bBmTf4+pKue27q1ablLSqC0FNq3D/9Wv7VvHz7Ea3uuvv3iPNemDVRWwhtvwKJFO/59+GH4zW92ZCwthX79ahaRfv3C1xGR3HP3WrucvIl/Ibb4YrF6Ney1V+P2bdu2/g/Zzp1z8+Fc2/bWrdPt+unVK9xOPnnn7e+/X7OIzJsH06eHwgwhd+/eNYvIAQeELi0RaZzS0lLWrl1b4yR31dVQpaWljf7a1tRqUyjKy8u9MXNDbd4c/hpu6Id3u3bhr3uJZ/NmWLp05yLyxhuweHE40qrSrVvtRaRXL73fItk0ZpyFmc1z9/JsX7vFFwtJ1/btsGJFzSKyaFHoHqxSWgr771+ziPTrF54TkcaJWyxafDeUpKukJHRJ9e4Np5yy83Nr1oTCkVlEnn8e/vjHHSfozaBPn9pPsHfpku/vRqR4qVhIwerWDY49NtwybdoES5bUPBJ57LFwWW+VsrLai0jPnurSEmkoFQtpdtq3h8MOC7dM27bBP/9Zs4hMnx5OvFfp0CF0aVUvIn37hnNRIlKTzllIi7B6dc0i8sYb8H//t6NNSQnst1/tJ9g7d04tukiidM5CJENZWbgNHrzz9o0bQ5dWZhFZtAgefRS2bNnRbs89ay8iPXtq9Lq0DCoW0qJ16AADBoRbpm3b4K23ah6NTJsG69btaNexY+1XafXtG8bhiBQLdUOJNIA7vPde7V1aK1bsaNeqVejSqioew4fDEUekl1ukLhpnIZJnGzbU7NJ6442wbds2uPRSmDABOnVKO6nIDjpnIZJnu+wCAweGW6b16+Gaa2DKFJgxA267Df7nf9LJKNJYutpcJGG77RYKxLPPwq67wrBhcNZZ8O67aScTiU/FQiRPjj4aXnoJfvjDMEPvgQfCHXfsmGBRpJCpWIjkUdu2cO21MH9+6K665BI47jhYuDDtZCL1U7EQSUG/fvD443D33eFE+IABcP31YXZekUKkYiGSEjO44IJQLIYPh/HjQ9F46qm0k4nUlGixMLOhZrbYzJaZ2dW1PH+Lmb0S3ZaY2bqM5/Y1s0fNbJGZLTSz3klmFUnLHnvA734Hf/1rGDV+/PFw0UXwwQdpJxPZIbFiYWatgNuAU4D+wAgz65/Zxt2/5+4D3H0AcCswI+Ppe4DJ7n4gMAh4L6msIoXg5JPh9dfhyitD99QBB4QR40UyFEqauSSPLAYBy9x9ubtvAaYBp9fTfgRwL0BUVFq7+xwAd9/g7hsTzCpSEDp0gJtugooK2HdfGDECTj115wkPRdKQZLHoAbyd8bgy2laDmfUC+gBPRJv6AevMbIaZvWxmk6MjFZEWYcAAmDsXfvYzePppOOgg+OlP4eOP004mLVWSxaK2uTjrOqA+B5ju7tuix62BwcDlwKeA/YALaryA2cVmVmFmFatXr256YpEC0qoVjBoVLqs94QT4wQ/gyCNh3ry0k0lLlGSxqAR6ZjzeB1hZR9tziLqgMvZ9OerC+hh4EBhYfSd3n+ru5e5eXlZWlqPYIoVl331h5ky4/35YuRIGDQqFY8OGtJNJS5JksXgR6GtmfcysLaEgzKzeyMz2BzoDz1Xbt7OZVVWAEwANW5IWywy+9KVwme1FF4UuqYMPhlmz0k4mLUVixSI6IhgJzAYWAfe5+wIzG29mwzKajgCmecb0t1F31OXA42b2GqFL686ksoo0F7vvDr/6FTzzTDgZfuqp4ST4qlVpJ5NipynKRZqpjz6CSZPCXFMdOsDkyfD1r4flYUXiijtFuX6sRJqpdu3guuvCPFOHHhq6p4YMCWtoiOSaioVIM7f//vDkk/DrX4fCcdhhYeqQjz5KO5kUExULkSJQUgLf+EY4qvjiF8OkhIcfHtbQEMkFFQuRIrLnnvCHP4SrpDZuhMGDwzTo69Zl31ekPioWIkXolFNgwYIwHuPOO8NCS/ffr3mmpPFULESKVMeOcPPN8OKL0L17mAZ92DBYsSLtZNIcqViIFLmBA+H558NAvieegP794ec/h23bsu8rUkXFQqQFaN0avve90DV13HFw2WVw1FHwyitpJ5PmQsVCpAXp3Rv+8pewTsaKFVBeHtbP2KgFACQLFQuRFsYMzj47XGb79a+Hkd8HHwyzZ6edTAqZioVIC9W5M0ydGtb8btsWhg6F886D97QmpdRCxUKkhTvuOHj11TCQ7777wmW2d9+ty2xlZyoWIkK7djBuXDjh3b9/6J767GdhyZK0k0mhULEQkf/q3z90S91xB7z0UpigcOJE2LIl7WSSNhULEdlJSQlcfHFYaGnYMBgzJozVeO657PtK8VKxEJFa7b13OIfx8MPw73/DMcfAd74D69ennUzSoGIhIvU67TRYuBBGjQqr9PXvD3/6U9qpJN9ULEQkq112gVtugblzoawMvvAFOOMMqKxMO5nki4qFiMT2qU+FiQknTYJHHw1HGb/4heaZaglULESkQdq0gSuuCPNMHX00XHppOJ8xf37aySRJKhYi0ih9+sAjj8Dvfw/Ll8MRR8Do0bBpU9rJJAkqFiLSaGZw7rnhMtuvfAVuvBEOOQQeeyztZJJrKhYi0mRdu8Jdd4X1MkpK4MQT4fzzYc2atJNJrqhYiEjODBkSzl2MGRPWAj/gALjnHs0zVQxULEQkp0pLYcIEePll6NcvHGGceCIsW5Z2MmkKFQsRScTBB8Ozz8Ltt4fLbQ85JJzT2Lo17WTSGCoWIpKYkhL41rfCCfBTTw1XSx1xRFgTXJoXFQsRSVz37jB9Ojz4ILz/Pnz60/Dd78KHH6adTOJKtFiY2VAzW2xmy8zs6lqev8XMXoluS8xsXbXndzWzd8zsF0nmFJH8OP30MM/UyJFh5Hf//jBzZtqpJI7EioWZtQJuA04B+gMjzKx/Zht3/567D3D3AcCtwIxqX2YC8FRSGUUk/3bdFaZMCVOed+kSCsgXvwgrV6adTOqT5JHFIGCZuy939y3ANOD0etqPAO6temBmRwB7Ao8mmFFEUnLkkVBREU56z5oVlnP95S9h+/a0k0ltkiwWPYC3Mx5XRttqMLNeQB/giehxCfAT4IoE84lIytq0gauugtdfh0GD4NvfhsGDYfHitJNJdUkWC6tlW11Dc84Bprt71dyV3wZmufvbdbQPL2B2sZlVmFnF6tWrmxBVRNL0iU+EWWzvuScUimHDdIltoUmyWFQCPTMe7wPU1St5DhldUMCngZFm9n/AzcBXzezG6ju5+1R3L3f38rKystykFpFUmIX5pe6+G5YsgTvvTDuRZEqyWLwI9DWzPmbWllAQalz3YGb7A52B/67w6+5fdvd93b03cDlwj7vXuJpKRIrPaafBZz4D48aF5VylMCRWLNz9Y2AkMBtYBNzn7gvMbLyZDctoOgKY5q7ZY0QkHGHcfDOsXg033ZR2GqlixfIZXV5e7hUVFWnHEJEcOe88eOCB0CXVs2f29tI4ZjbP3cuztdMIbhEpSBMnhtlqr7su7SQCKhYiUqB69YJRo8IVUi+/nHYaUbEQkYI1enQY5X3FFVoTI20qFiJSsHbfHcaOhccfh7/+Ne00LVusE9xmtgdwDNAd2AS8DlS4e8EMzNcJbpHitGULHHQQtG0Lr74KrVunnai45OQEt5kNMbPZwF8IEwLuTZgUcAzwmpndYGa75iKwiEht2rYN80ctXBgG7Ek66j2yMLPJwK3uvqKW51oDpwGt3P2B5CLGoyMLkeLlDsceC8uXw9KlsMsuaScqHjk5snD3K2orFNFzH7v7g4VQKESkuJnBT34C//pXGLAn+ZetG+q8aAbYup7/hJkdm/tYIiI7O+ooOOssmDwZ3n037TQtT7ZTRV2Bl81sHjAPWA2UAp8EPgOsATRnk4jkxY9/HJZmHTtWEw3mW7ZuqJ8DAwkzwpYBn40evwN8xd2/6O5LE08pIkKYynzkSLjrrrAGhuSP5oYSkWbl/fdD0fj0p8MKe9I0cU9wx7pi2czKgIuA3pn7uPvXGxtQRKQxunSBMWPg8sthzhw48cS0E7UMcUdwPwTsBjxGGHNRdRMRybuRI6F37zANyLZtWZtLDsQdC9nB3a9KNImISEzt2oWT3SNGwO9+B+efn3ai4hf3yOLPZvb5RJOIiDTA2WfDoEFw7bWwcWPaaYpf3GIxilAwNpnZv83sQzPTgocikpqqFfXeeQd+9rO00xS/WMXC3Tu5e4m7t3f3XaPHmhNKRFI1eDCccUboklq1Ku00xS32FOVm1tnMBpnZcVW3JIOJiMRx002weTPccEPaSYpbrGJhZhcCTwOzgRuif8clF0tEJJ5+/eCb34SpU+GNN9JOU7wacs7iU8A/3X0IcDhh6g8RkdRdfz106ABX6ZrNxMQtFpvdfTOAmbVz9zeA/ZOLJSISX1kZXHMNzJwJTz2VdpriFLdYVJrZ7sCDwBwzewhYmVwsEZGGGTUKevYMI7u3F8wansUj7tVQZ7r7OncfB1wH/AY4I8lgIiIN0b49TJwIFRUwbVraaYpPQ66GOtbMvubuTwHPAT2SiyUi0nBf/jIcfjiMHh2ukJLciXs11PXAVcDoaFMb4HdJhRIRaYySkjBQb8UKuPXWtNMUl7hHFmcCw4D/ALj7SqBTUqFERBrrhBPg1FNDl9TatWmnKR5xi8UWDwtfOICZdUwukohI00yaBB9+CBMmpJ2keMQtFveZ2R3A7mZ2EWGq8qyLGprZUDNbbGbLzKzG8qtmdouZvRLdlpjZumj7ADN7zswWmNl8Mzu7Id+UiLRs/fvDhRfCbbfBUq3lmROxV8ozsxOBkwADZrv7nCztWwFLgBOBSuBFYIS7L6yj/aXA4e7+dTPrB7i7LzWz7oT1vw9093V1vZ5WyhORTP/6F3zykzB0KEyfnnaawhV3pbzYV0NFxWEC8CNgnpl1ybLLIGCZuy939y3ANOD0etqPIKz1jbsvqVrbOzo/8h5hDXARkVj22guuvBIeeAD+8Y+00zR/ca+G+qaZrQLmAxWEv/Sz/RnfA3g743EldVxua2a9gD7AE7U8NwhoC7wZJ6uISJUf/AD23jv8G7MTReoQ98jicuAgd+/t7vu5ex933y/LPlbLtrr+u84Bprv7TgskmtnewG+Br7l7jTGZZnaxmVWYWcXq1ZqqSkR21rEj/PCHMHeuuqKaKm6xeBNo6FpUlUDPjMf7UPcUIecQdUFVMbNdCet8j3H3ubXt5O5T3b3c3cvLytRLJSI1nX8+HHIIXH01bNmSdprmK26xGA38w8zuMLMpVbcs+7wI9DWzPmbWllAQZlZvZGb7A50Jo8KrtrUF/gTc4+73x8woIlJDq1YweTIsXw633552muYrbrG4g3A+YS7hfEXVrU7u/jEwkrD2xSLgPndfYGbjzWxYRtMRwDTf+bKs4cBxwAUZl9YOiJlVRGQnJ58MJ54I48fDBx+knaZ5inXprJn9w92PzkOeRtOlsyJSn1dfDfNG/eAH4UhDglxfOvtkdDJ5bzPrUnVrYkYRkbw57DC44AKYMgXeeivtNM1P3GJxLtF5C3Z0QenPeBFpViZMCOcwrr027STNT9z1LPrUcst26ayISEHp0SN0Q917L7zwQtppmpfYI7irM7O9chlERCQfrrwS9tgjrKingXrxNbpYEFbLExFpVjp1ghtugGeeCWt2SzyNLhbufmoug4iI5MuFF8IBB4SjjK1b007TPDR4WdXofpmZ9UkulohIclq3DmteLFkCU6emnaZ50LKqItIinXYaHH88jBsH69ennabwaVlVEWmRzMJ63WvWwE03pZ2m8GlZVRFpsY44Ar78ZbjlFnj77eztW7JEl1UVESl0EyeGS2jHjEk7SWGLOyjvZmA68ACwPzDW3W9NMpiISD706gWXXQa//S28/HLaaQpX1okEo7W0Z7v75/ITqXE0kaCINNb69fCJT4T5ox57LJzPaClyNpFgtHrdRjPbLSfJREQKzG67wfXXwxNPwCOPpJ2mMMWdovw+4ChgDtEVUQDu/t3kojWMjixEpCm2bIGDD4Y2bcJ05q1bp50oP3I9RflfgOuAp4m5+JGISHPSti3ceCMsXAh33512msIT68gC/rvUab/o4WJ3L6hB8jqyEJGmcofBg2HZsnDbZZe0EyUvp0cWZnY8sBS4DbgdWGJmxzUpoYhIgakaqLdqlVbTqy5uN9RPgJPc/TPufhxwMnBLcrFERNJx1FEwfHgoGitXpp2mcMQtFm3cfXHVA3dfQpgfSkSk6Pz4x2E22rFj005SOOIWiwoz+42ZHR/d7kQnuEWkSO23H4wcGU50v/Za2mkKQ9xi8S1gAfBdYBSwELgkqVAiImkbMwZ23TWseSHxi0Vr4Ofu/gV3PxOYArRKLpaISLq6dIHrroO//hXmzEk7TfriFovHgfYZj9sTJhMUESla3/kO9OkT1uveti3tNOmKWyxK3X1D1YPofodkIomIFIZ27cLJ7vnzw0SDLVncYvEfMxtY9cDMyoFNyUQSESkcw4fDoEFw7bWwcWPaadITt1iMAu43s2fM7GlgGjAyuVgiIoWhaqDeypVhkaSWKm6x6AMcTrgqag6wmGjVPBGRYjd4MJx5Zpg7atWqtNOkI26xuM7d/w3sDpwITAV+mW0nMxtqZovNbJmZXV3L87eY2SvRbYmZrct47nwzWxrdzo+ZU0QkETfeCJs3w7hxaSdJR9xiUXUdwKnAr9z9IaBtfTtEiybdBpwC9AdGmFn/zDbu/j13H+DuA4BbgRnRvl2A64EjgUHA9WbWOWZWEZGc69cPLrkE7rwTFi1KO03+xS0W70RrcA8HZplZuxj7DgKWuftyd99COM9xej3tRwD3RvdPBua4+/vu/gGh62tozKwiIokYOxY6doSrrko7Sf7FLRbDgdnAUHdfB3QBrsiyTw/g7YzHldG2GsysF+G8yBMN3VdEJF/KymD0aHj4Yfjb39JOk1+xioW7b3T3Ge6+NHr8rrs/mmW32laxreuk+DnA9GgJ19j7mtnFZlZhZhWrV6/OEkdEpOlGjYKePcNAve3b006TP3GPLBqjEuiZ8XgfoK4Jf89hRxdU7H3dfaq7l7t7eVlZWRPjiohk1749/OhHMG8e3Htv9vbFIsli8SLQ18z6RKvsnQPMrN7IzPYHOgPPZWyeDZxkZp2jE9snRdtERFJ37rkwcCBcc024QqolSKxYuPvHhIF7s4FFwH3uvsDMxpvZsIymI4BpnrG+q7u/D0wgFJwXgfHRNhGR1JWUhJX0VqyAKVPSTpMfsdfgLnRag1tE8u200+CZZ+DNN6Fbt7TTNE5O1+AWEZGaJk2CDRtgwoS0kyRPxUJEpJH694eLLoLbb4elS9NOkywVCxGRJhg3LkxlfnWNCY2Ki4qFiEgT7LVXGNE9Ywb8/e9pp0mOioWISBN9//vQvXsYqFck1wzVoGIhItJEHTuGk9xz58L06WmnSYaKhYhIDpx/PhxySDh38dFHaafJPRULEZEcaNUqDNRbvjxcHVVsVCxERHLk5JPhpJNCl9QHH6SdJrdULEREcmjyZFi3DiZOTDtJbqlYiIjk0KGHwgUXwK23wltvpZ0md1QsRERybMKEcA7jmmvSTpI7KhYiIjnWo0cYczFtGrzwQtppckPFQkQkAVdcAXvsUTwD9VQsREQS0KkTjB8fpjB/6KG00zSdioWISEK+8Q048EC48krYujXtNE2jYiEikpDWrcOaF0uXwtSpaadpGhULEZEEnXoqDBkSpjJfvz7tNI2nYiEikiCzMFBvzRq48ca00zSeioWISMKOOALOOw9+9jNYsSLtNI2jYiEikgcTJ4ZLaMeMSTtJ46hYiIjkwb77wmWXwW9/Cy+9lHaahlOxEBHJk9GjoWvX5jlQT8VCRCRPdtstXBX15JMwa1baaRpGxUJEJI+++U3o2zcM1Pv447TTxKdiISKSR23awE03wcKFcNddaaeJT8VCRCTPzjgDjjkGxo6FDz9MO008KhYiInlmBj/5CaxaBTffnHaaeBItFmY21MwWm9kyM7u6jjbDzWyhmS0wsz9kbJ8UbVtkZlPMzJLMKiKST0ceCWefHUZ3v/NO2mmyS6xYmFkr4DbgFKA/MMLM+ldr0xcYDRzj7gcBl0XbjwaOAQ4FDgY+BXwmqawiImn40Y/CSe6xY9NOkl2SRxaDgGXuvtzdtwDTgNOrtbkIuM3dPwBw9/ei7Q6UAm2BdkAbYFWCWUVE8m6//eDSS+Huu2H+/LTT1C/JYtEDeDvjcWW0LVM/oJ+Z/d3M5prZUAB3fw54Eng3us1290UJZhURScW118Luu4dLaQtZksWitnMM1ccstgb6AscDI4Bfm9nuZvZJ4EBgH0KBOcHMjqvxAmYXm1mFmVWsXr06p+FFRPKhS5cwX9Ts2fDoo2mnqVuSxaIS6JnxeB9gZS1tHnL3re7+FrCYUDzOBOa6+wZ33wA8AhxV/QXcfaq7l7t7eVlZWSLfhIhI0r7zHejTJ6zbvW1b2mlql2SxeBHoa2Z9zKwtcA4ws1qbB4EhAGbWjdAttRxYAXzGzFqbWRvCyW11Q4lIUWrXLqx1MX8+3HNP2mlql1ixcPePgZHAbMIH/X3uvsDMxpvZsKjZbGCtmS0knKO4wt3XAtOBN4HXgFeBV9394aSyioik7ayzwuW0Y8bAxo1pp6nJvLlNfViH8vJyr6ioSDuGiEijPfssDB4MEybkb90LM5vn7uXZ2mkEt4hIgTj2WDjzzDB31KoCGyygYiEiUkBuvBE2bw5TmRcSFQsRkQLSrx9861tw551hZtpCoWIhIlJgxo6Fjh3hqqvSTrKDioWISIHp1g2uuQb+/Oewql4hULEQESlA3/0u7LtvWK97+/a006hYiIgUpPbtYeJEeOkluPfetNOoWIiIFKxzz4WBA0OX1KZN6WZRsRARKVAlJWElvRUrYMqUlLOk+/IiIlKfIUPgtNPCQklr1qSXQ8VCRKTATZoE//kPjB+fXgYVCxGRAnfggXDhhfDLX8KSJelkULEQEWkGxo2D0lIYPTqd11exEBFpBvbaK4zonjEjzE6bbyoWIiLNxPe/D927h4F6+V5dQsVCRKSZ6NABfvhDeP55uP/+/L62ioWISDPy1a/CoYfC1VfDRx/l73VVLEREmpFWrWDyZHjrLbj99vy9roqFiEgzc9JJcPLJYfnV99/Pz2uqWIiINEOTJsG6dWGywXxQsRARaYYOPRS+9jX4xS9g+fLkX0/FQkSkmRo/Hlq3DrPSJq118i8hIiJJ6NEDxoyBjRvDuAuz5F5LxUJEpBnL1/Qf6oYSEZGsVCxERCQrFQsREclKxUJERLJKtFiY2VAzW2xmy8zs6jraDDezhWa2wMz+kLF9XzN71MwWRc/3TjKriIjULbGrocysFXAbcCJQCbxoZjPdfWFGm77AaOAYd//AzPbI+BL3ABPdfY6Z7QJsTyqriIjUL8kji0HAMndf7u5bgGnA6dXaXATc5u4fALj7ewBm1h9o7e5zou0b3H1jgllFRKQeSRaLHsDbGY8ro22Z+gH9zOzvZjbXzIZmbF9nZjPM7GUzmxwdqYiISAqSHJRX21jC6ms7tQb6AscD+wDPmNnB0fbBwOHACuCPwAXAb3Z6AbOLgYujhxvMbHET8nYD1jRh/6QoV8MoV8MoV8MUY65ecRolWSwqgZ4Zj/cBVtbSZq67bwXeij7s+0bbX3b35QBm9iBwFNWKhbtPBabmIqyZVbh7eS6+Vi4pV8MoV8MoV8O05FxJdkO9CPQ1sz5m1hY4B5hZrc2DwBAAM+tG6H5aHu3b2czKonYnAAsREZFUJFYs3P1jYCQwG1gE3OfuC8xsvJkNi5rNBtaa2ULgSeAKd1/r7tuAy4HHzew1QpfWnUllFRGR+iU6kaC7zwJmVds2NuO+A9+PbtX3nQMcmmS+anLSnZUA5WoY5WoY5WqYFpvLwue1iIhI3TTdh4iIZNVii4WZnRVNMbLdzOq8iiDOlCU5ztXFzOaY2dLo3851tNtmZq9Et+oXDuQyT73fv5m1M7M/Rs8/n49pWWJkusDMVme8PxcmnSl63bvM7D0ze72O583MpkS555vZwALJdbyZrc94v8bW1i6BXD3N7MloSp8FZjaqljZ5f89i5sr7e2ZmpWb2gpm9GuW6oZY2yf0+unuLvAEHAvsDfwPK62jTCngT2A9oC7wK9E841yTg6uj+1cBNdbTbkIf3KOv3D3wb+FV0/xzgjwWQ6QLgFyn8TB0HDARer+P5zwOPEC7YOAp4vkByHQ/8OYX3a29gYHS/E7Cklv/LvL9nMXPl/T2L3oNdovttgOeBo6q1Sez3scUeWbj7InfPNogvzpQluXY68L/R/f8Fzkj49eoT5/vPzDsd+KxZkos7pvJ/Eou7Pw28X08DOkKJAAAE9ElEQVST04F7PJgL7G5mexdArlS4+7vu/lJ0/0PCVZPVZ3nI+3sWM1feRe/Bhuhhm+hW/aRzYr+PLbZYxBRnypJc29Pd34XwQwvsUUe7UjOriKZJSaqgxPn+/9vGw+XS64GuCeWJmwngi1G3xXQz61nL82lI4+cprk9H3RuPmNlB+X7xqLvkcMJfy5lSfc/qyQUpvGdm1srMXgHeA+a4e53vV65/H4t6DW4zewzYq5anrnX3h+J8iVq2NfnysfpyNeDL7OvuK81sP+AJM3vN3d9sarZq4nz/ibxH9Yjzeg8D97r7R2Z2CeEvrRMSzBRXvt+ruF4Cern7BjP7PGGwbN98vbiFWaUfAC5z939Xf7qWXfLynmXJlcp75mEM2gAz2x34k5kd7O6Z56ISe7+Kuli4++ea+CXiTFnSYPXlMrNVZra3u78bHW6/V8fXWBn9u9zM/kb46yfXxSLulC09gUozaw3sRrJdHlkzufvajId3AjclmKchEvl5aqrMD0J3n2Vmt5tZN3dPfA4kM2tD+ED+vbvPqKVJKu9ZtlxpvmfRa66Lfu+HApnFIrHfR3VD1S/OlCW5NhM4P7p/PlDjCMjMOptZu+h+N+AYkpkOJc73n5n3S8ATHp1dS0jWTNX6tIcR+pwLwUzgq9EVPkcB66u6HNNkZntV9Wub2SDC58La+vfKyesaYb63Re7+0zqa5f09i5MrjffMzMqiIwrMrD3wOeCNas2S+33M59n8QroBZxKq8EfAKmB2tL07MCuj3ecJV0O8Sei+SjpXV+BxYGn0b5doeznw6+j+0cBrhCuBXgO+kWCeGt8/MB4YFt0vBe4HlgEvAPvl4T3KlunHwILo/XkSOCBPP1P3Au8CW6OfrW8AlwCXRM8bYUGwN6P/t1qvwksh18iM92sucHSech1L6CKZD7wS3T6f9nsWM1fe3zPCjBYvR7leB8ZG2/Py+6gR3CIikpW6oUREJCsVCxERyUrFQkREslKxEBGRrFQsREQkKxULkQYwsw3ZW9W7//Ro1D1mtouZ3WFmb0aziD5tZkeaWdvoflEPmpXmRcVCJE+i+YNaufvyaNOvCaNr+7r7QYTZcrt5mCDxceDsVIKK1ELFQqQRohHFk83sdTN7zczOjraXRFM/LDCzP5vZLDP7UrTbl4lG5JvZJ4AjgTHuvh3C1C3u/peo7YNRe5GCoMNckcb5AjAAOAzoBrxoZk8Tpl7pDRxCmDF4EXBXtM8xhNHUAAcBr3iYGK42rwOfSiS5SCPoyEKkcY4lzGy7zd1XAU8RPtyPBe539+3u/i/CdCNV9gZWx/niURHZYmadcpxbpFFULEQap64FZepbaGYTYe4eCPMKHWZm9f0OtgM2NyKbSM6pWIg0ztPA2dFiNGWEpUtfAJ4lLLxUYmZ7EpbfrLII+CSAh7VHKoAbMmYv7Wtmp0f3uwKr3X1rvr4hkfqoWIg0zp8Is3++CjwBXBl1Oz1AmNn1deAOwgpr66N9/sLOxeNCwiJYy8zsNcLaG1VrNQwBZiX7LYjEp1lnRXLMzHbxsIJaV8LRxjHu/q9oDYIno8d1ndiu+hozgNGefZ14kbzQ1VAiuffnaJGatsCE6IgDd99kZtcT1kleUdfO0aJOD6pQSCHRkYWIiGSlcxYiIpKVioWIiGSlYiEiIlmpWIiISFYqFiIikpWKhYiIZPX/SpnfAiTm03QAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#需要调优的参数\n",
    "#SVM太慢，每次只调一个参数（这里只调C，penalty为‘l2'）\n",
    "C_s = np.logspace(-1, 3, 5)# logspace(a,b,N)把10的a次方到10的b次方区间分成N份  \n",
    "penalty_s = ['l2']\n",
    "\n",
    "score_s = []\n",
    "for i, oneC in enumerate(C_s):\n",
    "    for j, penalty in enumerate(penalty_s):\n",
    "        tmp = fit_grid_point_Linear2(penalty, oneC, X_train, y_train)\n",
    "        score_s.append(tmp)\n",
    "\n",
    "x_axis = np.log10(C_s)\n",
    "for j, penalty in enumerate(penalty_s):\n",
    "    plt.plot(x_axis, np.array(score_s), 'b-')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'score.mean()' )\n",
    "#plt.savefig('SVM_Otto.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import SVC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fit_grid_point_RBF(C, gamma, X_train, y_train):\n",
    "    \n",
    "    # 在训练集是那个利用SVC训练\n",
    "    SVC3 =  SVC( C = C, kernel='rbf', gamma = gamma)\n",
    "    SVC3 = SVC3.fit(X_train, y_train)\n",
    "    \n",
    "    # 在校验集上返回accuracy\n",
    "    score = cross_val_score(SVC3, X_train, y_train, cv=5)\n",
    "    \n",
    "    print(\"C= {} and gamma = {}: score.mean()= {} \" .format(C, gamma, score.mean()))\n",
    "    return score.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "score_s = np.matrix(np.zeros(shape=(5, 3)), float)\n",
    "gamma_s = np.logspace(-1, 1, 3)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 0.1 and gamma = 0.1: score.mean()= 0.7617434852728969 \n",
      "C= 0.1 and gamma = 1.0: score.mean()= 0.6510482981071216 \n",
      "C= 0.1 and gamma = 10.0: score.mean()= 0.6510482981071216 \n"
     ]
    }
   ],
   "source": [
    "oneC = 0.1\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    score_s[0,j] = fit_grid_point_RBF(oneC, gamma, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 1 and gamma = 0.1: score.mean()= 0.7565656565656566 \n",
      "C= 1 and gamma = 1.0: score.mean()= 0.7058144469909176 \n",
      "C= 1 and gamma = 10.0: score.mean()= 0.6510482981071216 \n"
     ]
    }
   ],
   "source": [
    "oneC = 1\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    score_s[1,j] = fit_grid_point_RBF(oneC, gamma, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 10 and gamma = 0.1: score.mean()= 0.7422884305237246 \n",
      "C= 10 and gamma = 1.0: score.mean()= 0.6901706137000255 \n",
      "C= 10 and gamma = 10.0: score.mean()= 0.649741108564638 \n"
     ]
    }
   ],
   "source": [
    "oneC = 10\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    score_s[2,j] = fit_grid_point_RBF(oneC, gamma, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 100 and gamma = 0.1: score.mean()= 0.7136236312706901 \n",
      "C= 100 and gamma = 1.0: score.mean()= 0.690162125456243 \n",
      "C= 100 and gamma = 10.0: score.mean()= 0.649741108564638 \n"
     ]
    }
   ],
   "source": [
    "oneC = 100\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    score_s[3,j] = fit_grid_point_RBF(oneC, gamma, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C= 1000 and gamma = 0.1: score.mean()= 0.6809608691961633 \n",
      "C= 1000 and gamma = 1.0: score.mean()= 0.690162125456243 \n",
      "C= 1000 and gamma = 10.0: score.mean()= 0.649741108564638 \n"
     ]
    }
   ],
   "source": [
    "oneC = 1000\n",
    "\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    score_s[4,j] = fit_grid_point_RBF(oneC, gamma, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEKCAYAAADjDHn2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd4VGX2wPHvSQ8JhBYgJEAIPQQIEIoBFJCOgAUpiiIqiIC6urCW1d0Vy09RFnVRV1cUBAUUaVJEkCYISOgQOlKSgER6S39/f9whhpCQNpNJOZ/nycPMnffee+aSzJn3fe89V4wxKKWUUrfi4uwAlFJKFX2aLJRSSuVIk4VSSqkcabJQSimVI00WSimlcqTJQimlVI40WSillMqRJgullFI50mShlFIqR27ODsBeKleubIKDg50dhlJKFStbtmz5wxjjn1O7EpMsgoODiYqKcnYYSilVrIjIsdy002EopZRSOdJkoZRSKkeaLJRSSuWoxMxZKFWSJCcnExMTQ0JCgrNDUSWEl5cXQUFBuLu752t9TRZKFUExMTGULVuW4OBgRMTZ4ahizhjDmTNniImJoXbt2vnahg5DKVUEJSQkUKlSJU0Uyi5EhEqVKhWop6rJQqkiShOFsqeC/j6V+mGolNQ03vlxP0HlvQms4E318t4ElvemrFf+xvWUUqokcmiyEJEewPuAK/CZMeatTK9PAjrZnpYBqhhjytteqwl8BtQADNDLGHPU3jH+cTmJL9YdJSk17Ybl5bzcCKxQhsDyXgTaEklg+TJUL+9FYAVv/H099ZufKhXatGlDYmIiZ8+e5dq1awQGBgIwf/588lI1Ye7cuYSGhtKwYcM87b99+/ZMnjyZ8PDwPK133bvvvkv16tV54IEH8rV+Ybj//vt5++23CQkJuem106dPM2DAADZv3szw4cN57733stzGmTNnGDBgAMePHyckJIRvvvkGPz8/u8XosGQhIq7Ah0BXIAbYLCILjTHR19sYY57N0P4poHmGTXwJvGGMWS4ivsCNn+Z2Us3Pi32v9eCPy4nEnL9G7LlrxJ2/Rqztccy5a2z67SyXElJuWM/DzYXA8t5W8ihvJRKrZ+JFUPkyVPPzwsNNR/lU8bdp0yYApk6dSlRUFJMnT87XdubOnYuLi0uek0VBJCcnM336dLZu3Vpo+8yPkSNH8s477/Dxxx/f9FqZMmV444032LZtG4cOHcp2G2+88QY9e/Zk7NixvP7660yYMIE33njDbjE6smfRGjhkjDkCICKzgH5AdDbtBwP/tLUNBdyMMcsBjDGXHRgnLi5ClXJeVCnnRYuaFbJsczEhmdhztmRywZZIbAll9f54Tl9KvKG9CFQp62nrlZSxJRSv9B5KYAVvfD1L/SigKuaWLl3K+PHjSUxMpF69enz++ef4+Pgwbtw4Fi9ejJubGz179uSuu+5iyZIlrF+/nn/961957pVcN2PGDN5++22MMfTt25c333wTgE8++YSJEydSvXp16tati6+vL++99x7Lly+nVatWuLq6ArBx40ZGjBiBr68v7dq1Y/ny5Wzfvp3Dhw/zyCOPcPnyZVxcXPjoo49o06YNK1as4I033qBSpUrs2LGDgQMHUr9+ff7zn/+QmJjIwoULCQ4OZsiQIfj5+REdHc3x48f54osvmDJlCps2baJdu3ZMmTIFgBEjRrB161auXbvGwIED+cc//gFAx44defzxx0lNTU2P9brrse7du/eWx2bBggVs3LgRgKFDh9KjR49ikywCgRMZnscAbbJqKCK1gNrAStui+sB5EZlrW74CeMEYk5ppvRHACICaNWvaNfjMynm5Uy7AnUYB5bJ8PTEllZPnE6weiS2JxJ63eik7Y87zw+6TJKeaTNt0S08kQRWu91LK2BKKN5V9PXSoS/Hq93uIjrto122GVi/HP/s0LtA2Tp8+zVtvvcVPP/2U/u33/fff57HHHmPJkiXs2bMHEeH8+fOUL1+eXr160b9/f+6+++587S8mJoaXX36ZqKgo/Pz86NKlC4sWLaJZs2a89dZbbN26FR8fHzp27Ejr1q0BWL9+PS1btkzfxrBhw5g2bRqtW7dm7Nix6csDAgJYvnw5Xl5e7Nu3j6FDh6b3qHbs2MHevXvx8/MjODiYUaNGsXnzZiZOnMjkyZN59913Abhw4QKrVq3iu+++o0+fPmzYsIGGDRvSokULdu/eTVhYGG+99RYVK1YkJSWFTp060b9/f0JDQ3F1dSU4OJjdu3fTrFmzfB2fM2fO4O9v1QMMDAzk5MmT+dpOdhyZLLL6lDNZLAMYBMzJkAzcgA5Yw1LHgdnAI8CUGzZmzKfApwARERHZbbtQeLq5ElzZh+DKPlm+npZmiL+ceEMiuT7kFXPuKpuOnOFSYtZDXdd/qqfPnVjJpWo5HepSzvPLL78QHR1NZGQkAElJSbRv356KFSvi4uLC8OHD6d27N3fddZdd9rdp0yY6d+5M5cqVAXjggQdYu3YtCQkJdO7cmQoVrFGB/v37c/z4cQBOnjxJ8+bW6PYff/xBUlJSeiJ54IEHWLFiBQCJiYmMGTOGHTt24ObmxuHDh9P326ZNG6pWrQpASEgI3bt3B6BJkyZs2LAhvV2fPn3Sl1evXp3Q0FAAQkNDOXr0KGFhYcycOZMpU6aQkpJCXFwc0dHR6e2qVKlCXFxcvpNFZvb+ounIZBGDNTl9XRAQl03bQcDoTOtuyzCENR9oS6ZkUZy4uAhVy3lR9RZDXReuJVvzJdeTSYZeysr9p4nPYqiralmv9ARyPZlkPLNLh7qKv4L2ABzFGEOPHj2YPn36Ta9FRUWxfPlyZs2axccff8yPP/6Y7XYyfoDfe++96UMzWe0vL8sBvL29068tuFW7iRMnUqNGDWbMmEFycjK+vr7pr3l6eqY/dnFxSX/u4uJCSkrKTe0ytsnY7uDBg7z//vv8+uuvlC9fniFDhtxw3UNCQgLe3t7MmTOH119/HbDmiXI7sV+pUiXi4+Px9/cnNjaWatWq5Wq93HLkJ8lmoJ6I1AZisRLCTacjiEgDoAKwIdO6FUTE3xgTD3QGSnz9cT9vd/y8sx/qSkhO5dSFhD8n323DXLHnrrH9xHmWZjHU5eftnp5IgmxJJeMpwjrUpfIrMjKSZ555hiNHjhASEsKVK1eIi4ujWrVqJCQkcNddd9GmTZv0b85ly5bl0qVLN23Hw8OD7du357i/tm3bMm7cOM6cOYOfnx+zZs1i7NixNGnShOeff57z58/j4+PD3LlziYiIAKBRo0bpk8L+/v64u7sTFRVFREQEs2bNSt/2hQsXqFu3LiLCtGnTbplY8uvixYuULVuWcuXKcfLkSZYtW0aPHj3SXz948CCNGzfG39+f/v3753n7ffv2Zdq0aYwdO5Zp06bRr18/e4bvuGRhjEkRkTHAMqxTZz83xuwRkfFAlDFmoa3pYGCWyfC/Y4xJFZGxwE9ifZJtAf7nqFiLCy/33A11xWQa5oo9f40TZ6+y8cgZLmca6vJMP6vLO8Mpwn8ml2p+Xri76lCXulnVqlWZMmUKAwcOJCkpCYA333wTb29v7r33XhITE0lLS+Pf//43AIMHD+aJJ55g4sSJ+ZrgDgoKYvz48XTs2BFjDH369KF3794AjBs3jtatWxMYGEjjxo3TTxnt1asXjz32WPo2Pv/8c4YNG0bZsmW5/fbb09uNGTOG/v37M3PmTLp06XJDz8BeWrRoQWhoKGFhYYSEhNCuXbv01+Li4vDz80ufc8jqvV+9epXk5GTmzJnDTz/9RIMGDRg2bBjPPPMM4eHhvPTSSwwYMIBPPvmE2rVrM3v2bLvGL47IoM4QERFh9OZHt2aM4WJCSoY5k6vEXUi44cyuPy7fONTlIlC1nNdNyaRN7YrUq1rWSe+k5Nu7dy+NGjVydhjFxuXLl/H19SU5OZl+/frx5JNPps8h9O3bl/fee4+QkJD0dmCdanr27FkmTpzozNABeOedd6hSpQpDhw516H6y+r0SkS3GmIic1tUB7VJERNKHukKrZz/UddKWQGLPXyX2/J+Pt504x5JdJ0lJs75g1KviS68mAfRqEkD9qr46nKWc5pVXXmH16tUkJCTQo0ePGybV3377beLi4ggJCWHhwoVMmDCBlJQUgoODmTp1qvOCzqBSpUoMGTLE2WHckvYsVJ6kphnizl9j1f7TLN55kl+PnsUYqOPvQ+8mAfRqGkCDqmU1cRSQ9iyUI2jPQhUaVxehRsUyPHxbMA/fFszpSwks2/M7S3aeZPKqQ3yw8hAh/j70CrN6HI0CNHEoVRJoslAFUqWsFw+1rcVDbWsRfymRZXtOsXT3ST5afYjJqw5Ru7IPPcOq0atJAI2rl9PEoVQxpclC2Y1/WU+GtK3FkLa1OHM50epx7DrJJ2uP8NHqw9SqVMaa4wgLICxQE4dSxYkmC+UQlXw9eaBNTR5oU5OzV5L4cc8pFu86yadrj/Dx6sPUrFiGnk2q0btJAE0C/TRxKFXE6Qn0yuEq+ngwqHVNpj/Whqi/d+Ht+5pQu7IPU37+jb6T19NhwireXLKX7SfOO+RiKFUwbdq0ITw8nJo1a+Lv7094eDjh4eEcPXo0T9uZO3cu+/bty/P+27dvn6uL9rLz7rvv8vXXX+d7/cJw//33c+TIkfTnmzdvJiwsjLp16/Lss89muY4xhlGjRlG3bl2aNWtWoGOUG9qzUIWqgo8HA1vVZGCrmpy/msSP0dZQ1Rfrf+PTtUcILO9NrybV6NkkgOY1ymuPowjQEuWOl7lE+ciRI/niiy+IiIige/fuLF++nK5du96wzvfff8+JEyc4dOgQ69atY/To0axfv95hMWrPQjlN+TIeDIiowdRhrYn6e1fevb8ZDaqVZeovR7n3o19o99ZKXlsUzZZj50hL0x5HUbR06VJuu+02WrRowcCBA7ly5QpgXVEdGhpK06ZNef755/n5559ZsmQJzz77bL56JdfNmDGDJk2aEBYWxksvvZS+/JNPPqF+/frppb7/8pe/AGRZorxp06ZERkYybty49LpLhw8fpkOHDjRv3pyWLVumJ8gVK1akV4etV68eL7/8Ml9++SWtWrWiadOm6e9jyJAhjB49mk6dOlGnTh3Wrl3L0KFDadiw4Q1XkI8YMYKIiAgaN27M+PHj05d37NiRH374gdTUVE6cOEFCQgKtWrVCRHjooYeYP3/+TcdiwYIFPPzww4DV+zp16hTx8fH5Oq65oT0LVST4lXGnf8sg+rcM4sK1ZFZE/87S3SeZvuEYU9b9RoCfFz3CrDmOFjUr4OJSinocS1+AU7vsu81qTaDnWzm3uwUtUe6YEuXXrl2jRo0/a7AGBQURGxt70/GIjY3Nsl12JUMKSpOFKnL8vN25r2UQ97UM4mJCMj/t/Z3FO0/x1abjfLH+KNXKWYmjV5MAImqVssRRhGiJcseUKC9fvvxN7z2r4dis5vccOWyryUIVaeW83LmneRD3NA/iUkIyK/dZV45//etxpv5ylCplPdOv44gIrohrSUwcBewBOIqWKHdMifKgoCBOnPjzvnExMTFUr179ppivt2vbtu0t29mLzlmoYqOslzv9wgP59OEItr7SlfcHhdOiZgVmbT7BwE830vb/fuKV+bvZcPgMqTrH4XCRkZGsWbMm/SyeK1eucPDgQS5dusTFixe56667mDRpEtu2bQNyLlG+ffv2bBMFWCXKV61axZkzZ0hJSWHWrFnccccdtGnThlWrVnH+/HmSk5OZO3du+jrZlSgHbipRHhAQUOglyjO6XqK8Ro0aeHp6snnzZowxTJ8+Pcty43379uXLL78EYN26dVStWtVhQ1CgPQtVTPl6utEvPJB+4YFcSUxh5b7TLNl1km+3nGD6xmNU9vWge2NrjqN17Yq4aZl1u9MS5XmTlxLlH3/8MY888kj6fUGunwn14Ycf4unpyeOPP06fPn1YunQpderUwcfHh2nTptk95oy0kKAqUa4mpbBqXzxLdp1k5b7TXEtOpZKPB91tk+Ntikni0EKCeaMlynNHCwkqZVPGw43eTQPo3TSAq0kprN5vJY7522L5etNxKvp40L1xVXo1CeC2kErFInGonGmJcsfTnoUqFa4lpbLmwGkW7zrFT3t/52pSKhXKuNMttBq9mgYQWadSkbojoPYslCNoz0KpHHh7uNIjLIAeYQEkJKey5oDV41i0M47ZUScoX8adbqFWjyOyTmU83IpO4lCqKNBkoUodL3dXujeuRvfG1UhITmXtgXiW7j7F0l2n+CYqBj9vd7qGVqV3kwDa1dXEoRRoslClnJe7K90aV6Nb42okpqTy84E/WLLrJMt2n2LOlhjKermlJ4729Srj6ebq7JCVcgpNFkrZeLq50iW0Kl1Cq5KYksr6Q3+weOcpfow+xdytsZT1dKOLbaiqQ73KeLlr4lClh/avlcqCp5srnRtWZeKAZmx5uStfPNKK7mHV+Gnv7wz/MoqI11fwl1nb+HHPKRKSU50drkNpiXLHy1yi/IUXXiAoKCjL0h8Zvf7669StW5eGDRumly5xFO1ZKJUDDzcXOjWsQqeGVUi6pwm/HLaGqn6M/p352+Pw8XDlzkZWj6NjA/8S1+PQEuWOl7lEeb9+/RgzZgxhYWHZrrNz507mzp1LdHQ0J06coEePHuzfvx8XF8f0AbRnoVQeeLi50LFBFSb0b8bmv3fhy0db06dZdX4+GM/IGVto+dpynpq5jaW7TnItqWT3OEBLlDuiRDnAbbfdRrVq1W55LBYsWMDgwYPx8PCgTp061KxZky1btuTruOaG9iyUyid3Vxdur+/P7fX9ee3uMDYeOWNNju/5ne93xFHGw5VODavQu0kAnRtWyXeP4+1f32bf2bwP39xKw4oNeb718wXahpYod0yJ8mbNmuXqeMTGxtKxY8f059dLlLdq1SpfxzcnmiyUsgN3Vxc61POnQz1/XuuXxqbfzrLYdlbV4p0nqV3Zhwn9m9IquKKzQ7UbLVHumBLluU0WJapEuYj0AN4HXIHPjDFvZXp9EtDJ9rQMUMUYUz7D6+WAvcA8Y8wYR8aqlL24ubrQrm5l2tWtzPi+jVlzIJ5/LtzDgE82MCyyNuO6N8DbI/e9jIL2ABxFS5Q7pkR5buW2lLm9OGzOQkRcgQ+BnkAoMFhEQjO2McY8a4wJN8aEA/8B5mbazGvAGkfFqJSjubm6cGejqiz7y+081LYWn6//jZ7vr+XX3846O7QC0xLleZPbEuW51bdvX2bOnElSUhKHDx/m2LFjNwy52ZsjJ7hbA4eMMUeMMUnALODmoux/GgzMvP5ERFoCVYHsv5IoVUz4eLoxvl8YM4e3JdUYBn66gVe/38PVpJScVy6iMpYob9asGZGRkRw4cIALFy7Qu3dvmjVrRufOnW8oUf7mm2/me4I7Y4ny8PBw2rZtS+/evalZs2Z6ifJu3brdVKJ8zZo/v29eL1EeGRmJi4vLDSXKP/vsM9q2bcuxY8ccXqJ8+PDhtyxR/txzzxEcHMzFixcJCgri9ddfB2DevHnpE+PNmjXj7rvvplGjRvTq1YuPPvrIYWdCAVbXzBE/QH+soafrzx8CJmfTthZwEnC1PXcBVgM1gEeyWy/jT8uWLY1SxcHlhGTzj/m7TK3nF5nbJ6w0Gw//cVOb6OhoJ0RWfF26dMkYY0xSUpLp2bOnWbhwYfprffr0MYcPH76hnTHGvP766+a5554r3ECzMWHCBDN16lSH7yer3ysgyuTiM92RPYusZlqy69sNAuYYY66fazgKWGKMOZFNe2sHIiNEJEpEouLj4wsQqlKFx8fTjVf7hTFrRFuMgYGfbuRfC4t3L8PZXnnlFZo3b07Tpk1p0KBBliXKARYuXEh4eDhhYWFs2LCBF1980Vkh36BUlygXkduAfxljutuevwhgjPm/LNpuA0YbY36xPf8K6ACkAb6AB/CRMeaF7PanJcpVcXQ1KYUJP+xn6i9HqVmxDBP6N6VtSCUtUa4coiAlyh3Zs9gM1BOR2iLigdV7WJi5kYg0ACoA6eegGWMeNMbUNMYEA2OBL2+VKJQqrsp4uPGvvo2ZPaItIjDo0438c8FujLn12TtK5VVBf58cliyMMSnAGGAZ1umv3xhj9ojIeBHpm6HpYGCW0b8MVYq1CanE0mc6MKxdMF9uPMbW2MvEnjqtCUPZhTGGM2fO4OXlle9t6J3ylCpifv3tLOMX7KRffS8aV/XGz9sdFwdebKVKBy8vL4KCgnB3d79hud4pT6liqnXtinw7qgPvLNvPm3N+I7C8NxPua0pk3crODk2VYlpIUKkiyNvDlX/0CeWbJ27D3dWFBz7bxMvzd3E5Uc+YUs6hyUKpIqxVcEWWPN2Bx9vX5qtNx+k+aS3rD/3h7LBUKaTJQqkiztvDlZfvCmXOyNvwdHPhwc828dI87WWowqXJQqliomWtiix5pgPDO9Rm5q9WL2PdQe1lqMKhyUKpYsTL3ZW/9w5lzshIPN1dGDJlEy/O3cWlhGRnh6ZKOE0WShVDLWtVYMnTHXji9hBmbz5Oj/d+5ueDWvJGOY4mC6WKKS93V17s1Yg5T0bi5e7CQ1N+5cW5O7WXoRxCk4VSxVyLmhVY/HQHnrgjhNmbT9B90lrWHNBehrIvTRZKlQBe7q682LMR3z0ZSRlPN4Z+/ivPz9nJRe1lKDvRZKFUCdK8ZgUWPdWeJzvW4dstVi9j9f7Tzg5LlQCaLJQqYbzcXXm+R0PmjmqHr6cbj3yxmb/N2cGFa9rLUPmnyUKpEiq8Rnm+f6o9ozrWYc6WGLpPWssq7WWofNJkoVQJ5uXuyt96NGTeqHaU83Zj2BebGfet9jJU3mmyUKoUaGbrZYzuVIe522LpNmkNq/ZpL0PlniYLpUoJTzdXxnVvyLxRkZT39mDY1M389ZsdXLiqvQyVM00WSpUyTYPKs/CpdjzVuS7zt8fS7b01rNz3u7PDUkWcJgulSiFPN1f+2q0B80e1o0IZDx6dGsVz32zXXobKliYLpUqxJkF+LBzTnqc712XB9ji6TlrDimjtZaibabJQqpTzcHPhuW4NWDC6HRV9PHj8yyiem72d81eTnB2aKkI0WSilAAgLtPUy7qzHwh1xdJ20luXay1A2miyUUuk83Fx4rmt95o9uR2VfT4Z/GcVfZm3j3BXtZZR2miyUUjcJC/Rjweh2/KVLPRbtPEnXSWtZtueUs8NSTqTJQimVJQ83F/7SpT4Lx7SnSllPnpi+hadnai+jtNJkoZS6pdDq5Vgwph3Pda3P0t0n6TppDT/s1l5GaaPJQimVI3dXF2vie0x7qpbzYuSMLTw1cxtntZdRajg0WYhIDxHZLyKHROSFLF6fJCLbbT8HROS8bXm4iGwQkT0islNEBjoyTqVU7jQKKMf80e34a9f6/LD7JN0mreGH3SedHZYqBGKMccyGRVyBA0BXIAbYDAw2xkRn0/4poLkx5lERqQ8YY8xBEakObAEaGWPOZ7e/iIgIExUVZff3oZTK2r5TFxn77Q52x17krqYBvNq3MZV8PZ0dlsojEdlijInIqZ0jexatgUPGmCPGmCRgFtDvFu0HAzMBjDEHjDEHbY/jgNOAvwNjVUrlUcNq5Zg3qh1ju9Vn2Z5TdJu0liW7tJdRUjkyWQQCJzI8j7Etu4mI1AJqAyuzeK014AEcdkCMSqkCcHd1YUzneix6qgPVy3sz6qutjP5qK2cuJzo7NGVnjkwWksWy7Ma8BgFzjDGpN2xAJACYDgwzxqTdtAORESISJSJR8fHxBQ5YKZU/DaqVZd6oSMZ1b8Dy6N/pOmkti3dqL6MkcWSyiAFqZHgeBMRl03YQtiGo60SkHLAYeNkYszGrlYwxnxpjIowxEf7+OkqllDO5ubowulNdvn+qPUEVvBn99VZGfbWFP7SXUSI4MllsBuqJSG0R8cBKCAszNxKRBkAFYEOGZR7APOBLY8y3DoxRKWVnDaqVZe6TkfytRwNWRJ+m26S1LNoZh6NOplGFw2HJwhiTAowBlgF7gW+MMXtEZLyI9M3QdDAwy9z4mzQAuB14JMOpteGOilUpZV9uri6M6liXxU+3p0YFb8Z8vY1RX20l/pL2Moorh506W9j01FmliqaU1DT+9/NvTFp+AB9PV17tF0afpgGIZDWtqQpbUTh1VimlcHN14cmOdVj8dHtqVvLh6ZnbeHKG9jKKG00WSqlCUa9qWb4beRsv9mzIyv2n6TppDQu2x+pcRjGhyUIpVWjcXF144o46LHm6PcGVfHhm1naemL6F05cSnB2ayoEmC6VUoatbpSzfPRnJS70asuZAPN0mrdVeRhGnyUIp5RSuLsKI2+uw5JkOhFS2ehkjpm/h9EXtZRRFmiyUUk5Vx9+Xb0dG8nLvRqw9EE/XSWtZuCO763eVs2iyUEo5nauL8HiHEJY+04E6/tYZU3+ft4uE5NScV1aFQpOFUqrICPH3ZfYTt/HEHSF8tek49338C8fOXHF2WApNFkqpIsbd1YUXezZiytAIYs5d464P1ukNloqAXCcLEWkvIsNsj/1FpLbjwlJKlXZ3NqrK4qfbE1LFl5EztjL++2iSUm4qPq0KSa6ShYj8E3geeNG2yB2Y4aiglFIKIKhCGb594jaGtQvm8/W/MeCTDcSev+bssEql3PYs7gH6Alcg/e51ZR0VlFJKXefh5sI/+zTm4wdbcPj0ZXp/8DMr9/3u7LBKndwmiyRbVVgDICI+jgtJKaVu1rNJAN8/1Z7qft48OjWKt3/YR0qqDksVltwmi29E5BOgvIgMB1YA/3NcWEopdbPgyj7MHRXJA21q8vHqwzzwv038rhfxFYpclygXka5AN6zbpS4zxix3ZGB5pSXKlSpd5m+L5aV5u/B2d+W9QeF0qKd3y8wPu5UoFxFXEVlhjFlujBlnjBlb1BKFUqr0ubt5IAvHtKOSrwcPf/4rk5YfIDVNa0s5So7JwhiTClwVEb9CiEcppXKtbpWyzB/djnuaB/L+Twd5+PNNep8MB8ntnEUCsEtEpojIB9d/HBmYUkrlRhkPNybe34wJ9zUl6ug5en/wM5uOnHF2WCVObpPFYuAVYC2wJcOPUko5nYgwoFVoEl5vAAAcpElEQVQN5o9uh4+nG4P/t5GPVh8iTYel7CYvE9weQH3b0/3GmGSHRZUPOsFdSJKuwoLRUKYiNB0EQRGg91JWRcilhGRenLuLRTtP0qmBP/8eEE4FHw9nh1Vk5XaCO1fJQkQ6AtOAo1hnQ9UAhhpj1hYsTPvRZFEIjIF5I2HnbHDzhJQEqFgHmg6EpgOgolaAUUWDMYYZG4/x2qK9VPb1YPKDLWhRs4KzwyqS7HY2lM1EoJsx5g5jzO1Ad2BSQQJUxdDmz2DnLOj4Iow9AH0nQ7nqsPpN+CAcpnSHqM/h2jlnR6pKORHhoduC+e7JSFxdhQH/3cCUdb/pnfgKILc9i53GmKY5LXMm7Vk42PGNMLU31O0Cg2aCS4bvGedPwK5vYMds+GM/uHpAvW7QbJD1r5un8+JWpd6Fa8mM+3YHP0b/TvfGVZnQvxl+3u7ODqvIsPcw1OdYpT6m2xY9CLgZY4YVKEo70mThQJdOwSd3gLs3jFgN3uWzbmcMnNxuJY3dc+BKPHiVh7B7rfmNGq11fkM5hTGGKet+462l+6he3puPHmxBWKBeDQD2TxaewGigPdacxVrgI2NMkTmhWZOFg6Qmw7Q+cHIHPL4CqjbO5XopcGQV7JgF+xZDyjWoUPvP+Y1KdRwbt1JZ2HLsHGO+3sqZy0n8o08oD7apiZTyLzD2ThY+QILtAj1ExBXwNMZcLXCkdqLJwkGWPg+b/gv3TYEm/fO3jYSLsPd7a77jt58BA0GtrMQRdp91ZpVSheTslSSe+2Y7q/fH07dZdd68twm+nm7ODstp7J0sNgJdjDGXbc99gR+NMZEFjtRONFk4wI7ZMG8EtB0FPf7PPtu8EPvn/Eb8XnBxt81vDIT6PXR+QxWKtDTDx2sOM/HH/QRX8uGjIS1oWK2cs8NyCnufDeV1PVEA2B6XyUUQPURkv4gcEpEXsnh9kohst/0cEJHzGV4bKiIHbT9DcxmnspdTu+D7Z6BWO+g63n7b9QuE9s/CqA3wxFpo8wTERsE3D8O79ax9HttgzX8o5SAuLsLoTnX56vG2XEpM4e4P1/NN1Alnh1Wk5bZnsR54yhiz1fY8AviPMea2W6zjChwAugIxwGZgsDEmOpv2TwHNjTGPikhFIAqIwJpY3wK0NMZke06m9izs6No5+LQjpCRaH+i+VRy7v9QU+G211dvYtwiSr0L5WtYwVbNBOr+hHCr+UiLPzNrGL4fP0L9lEK/1C8Pbw9XZYRWa3PYscjtQ9wzwrYjEYX14VwcG5rBOa+CQMeaILaBZQD8gy2QBDAb+aXvcHVhujDlrW3c50AOYmct4VX6lpcF3w63homFLHJ8oAFzdrFNy63aBxMt/zm+sfQfWToDAiD/nN3wqOT4eVar4l/Vk+mNteP+ng/xn5UF2xVzgwwdbULeKr7NDK1JyOwxVG2gOPAksB/Zju2veLQQCGft1MbZlNxGRWrZ9rMzLuiIyQkSiRCQqPj4+F29D5WjNW3BoOfR8yzrVtbB5+kL4YHh4ATwXDV1fs64UXzoOJtaHrwfBnnmQrDe8Ufbj6iI817U+04a1Jv5yIv0mr2PB9lhnh1Wk5DZZvGKMuQiUxxpW+hT4OId1sjofLbsEMwiYc/1sq9yua4z51BgTYYyJ8PfXG58U2P4fYM3bEP4gRDzm7Gisq8PbPQ1ProeR66HtkxC3Db59BN6tDwufgqPrrd6QUnZwe31/ljzdgdDq5Xhm1nb+Pm8XCcmpOa9YCuQ2WVw/Wr2B/xpjFgA5VeaKwaohdV0QEJdN20HcOMSUl3WVPZw5DHNHQEAz6D2x6F08Vy0Mur1u9TYemgcNesKu72BqL3i/Gfz0Gvxx0NlRqhKgmp8XXw9vyxN3hPDVpuPc9/EvHDtzxdlhOV1uJ7gXAbFAF6AlcA341RjT7BbruGFNcN9pW3cz8IAxZk+mdg2AZUBtYwvGNsG9BWhha7YVa4L7bHb70wnuAki6Ap91gUsnYcQaqFDL2RHlTtIV64K/HbOsCwBNGlRvbl0tHnYf+GpvUxXMT3t/57lvdpCWZnjn/qb0CAtwdkh2Z+/rLMpgTTDvMsYcFJEAoIkx5scc1usFvAe4Ap8bY94QkfFAlDFmoa3Nv7BOzX0h07qPAi/Znr5hjPniVvvSZJFPxsB3j8HuuTDkO6h7p7Mjyp9Lp2DXHGti/NQuEFdrwrzZQGjQyypVolQ+nDh7lTFfb2VHzAUebVebF3o2xMMtt4MyRZ9dk0VxoMkinzZ8CMtegjv/AR3+6uxo7OP3aCtp7PwWLsWBZzkI7Wv1OGq1u7EIolK5kJSSxptL9jL1l6OE1yjPhw+2ILB8yfgCoslC5ezoOpjW1xr/Hzij6M1TFFRaqvUed86G6AWQdBnKBUHT+63EUaWhsyNUxcySXSf525yduLkK/x7QjM4Nqzo7pALTZKFu7UIsfHoHePnB8FXgVcJLHSRdhf1LrPmNwyvBpFqT+U0HWTWvCuN6ElUiHP3jCqO+2kr0yYs82bEOf+1aHzfX4ttb1WShspeSaN2b4vdoGL6y9H3DvnzaNr8x2yqpLq5Qp7N1tXiDXuCRYyUbVcolJKfy6vfRzPz1OK2DK/KfB5pTtZyXs8PKF00WKnuLnrXuaHf/NGh8t7Ojca7T+6yksfMbuBgDHmVt8xsDILgDuJSesg8q7+Zvi+WlebvwdnflvUHhdKhX/M7A02ShsrZtBiwYDe2esW+BwOIuLQ2OrbcmxqMXQuJFKFv9z/mNqqHOjlAVUYdOX2LUV1s5ePoyT3eux9N31sPVpfjM/2myUDeL22bdJ7tmGxgyz6rJpG6WfM02vzEbDq2w5jeqNbHNb9wPZYv/pKayr6tJKbw8fzdzt8bSvm5l3hsUTmXf4lFuX5OFutGVM1YlWZMGT6wBn8rOjqh4uBwPe+ZaE+NxW0FcIKSTNb/RsDd4+Dg7QlVEGGP4NiqGVxbsxs/bnf8Mbk6bkKJf+FKThfpTWirMuBeO/QKP/gCBLZ0dUfEUf+DP+Y0Lx8Hd58/5jdp36PyGAmDvyYuM+morx89eZWy3BjxxewguRXhYSpOF+tOKV2Hdv6Hvf6DFw86OpvhLS4PjG6z5jT0LIPEClA2wTsFtOsiqY6VKtUsJybwwdxeLd56kUwN//j0gnAo+OZXTcw5NFsqy93uYPQRaDIW+Hzg7mpInOQEO/GD1OA7+CGkpUDXMuv9Gk/uhXMmrJaRyxxjDjI3HeG3RXir7ejD5wRa0qFnB2WHdRJOFsoZN/tcZ/OvDsKV6f2tHu3Lmz/mN2ChrfqP2Hdb8RhU7nk1l1yvt7bitohqXk+3//RKvL47mj8tJDO8Qwj3NqyP2fn/uXlAxJF+rarIo7RIvWYni6llrQtsvyNkRlS5/HLLNb8yG88ecHY0q6QIjYPhP+VrV3rdVVcWJMTB/FJw5ZN1xThNF4atcFzr/HTq9BLFbrKq4dmHHL3d2/aJYVOMqOgxWyfP52+Ko4OPB8A61qVnRTtUCvB0/vKXJoiRa/z7sXWjdkrT27c6OpnQTgaAcv7SpUkCALo2hQquzjPl6G7N/SOIffUJ5sE1NpBgU8Sy+1a9U1o6shp9ehdC7IfIpZ0ejlMqkZa2KLH66A7fVqcTL83fzzKztXE5McXZYOdJkUZKcPw7fDoPK9aHfhyWv5LhSJURFHw++eKQV47o3YNHOOPpOXse+UxedHdYtabIoKZITYPZD1qmbA2eAp6+zI1JK3YKLizC6U12+erwtlxJSuPvD9XwbdcLZYWVLk0VJYAws+atVbvue/0Lles6OSCmVS7fVqcSSpzvQomYFxs3Zybhvd3AtKdXZYd1Ek0VJsGWqVU22w1irXpFSqljxL+vJ9Mfa8HTnuszZGsPdH67ncPxlZ4d1A00WxV1MFCz9G9S50zpNUylVLLm6CM91a8DUYa2Jv5xI3/+sY8H2WGeHlU6TRXF2Od6apyhbDe77TAvZKVUC3FHfn8VPt6dRQDmembWdv8/bRUKy84elNFkUV6kpMGcYXDtrTWiXqejsiJRSdhLg583MEW154o4Qvtp0nPs+/oVjZ644NSZNFsXVin/C0Z/hrvcgoJmzo1FK2Zm7qwsv9mzEZw9HEHPuGnd9sI4fdp90WjyaLIqj3XNhw2RoNRzCBzs7GqWUA3UJrcqip9oT4u/DyBlbGf99NEkpaYUehyaL4ub0XlgwBmq0ge5vOjsapVQhqFGxDN+OjOSRyGA+X/8bAz7ZQOz5a4UagyaL4iThAsx60LqV5/3TwK1o3kxFKWV/Hm4u/KtvYz56sAWHTl+m9wc/s3Lf74W2f4cmCxHpISL7ReSQiLyQTZsBIhItIntE5OsMyyfYlu0VkQ+kOFTacqS0NJg30ip3PWCa3lRHqVKqV5MAFj3Vnup+3jw6NYq3f9hHSqrjh6UclixExBX4EOgJhAKDRSQ0U5t6wItAO2NMY+AvtuWRQDugKRAGtALucFSsxcK6ibB/CXR7A2pFOjsapZQTBVf2Ye6oSAa3rsnHqw/z8Oe/kpbm2NLujixR3ho4ZIw5AiAis4B+QHSGNsOBD40x5wCMMadtyw3gBXhgVfZ1Bwqvv1XUHFwBK9+wbtPZ5glnR6OUKgK83F35v3ub0Lp2BS5cTcbFxbGDL45MFoFAxqpYMUCbTG3qA4jIesAV+Jcx5gdjzAYRWQWcxEoWk40xex0Ya9F17ih89xhUbQx93tdKskqpG9zTvHBububIZJHVp1rmfpIbUA/oCAQBP4tIGFAZaGRbBrBcRG43xqy9YQciI4ARADVr1rRf5EVF0lWYPQQwMHC6NbGtlFJO4MgJ7higRobnQUBcFm0WGGOSjTG/Afuxksc9wEZjzGVjzGVgKdA28w6MMZ8aYyKMMRH+/v4OeRNOYwwsehZO7YZ7P8v3zdiVUsoeHJksNgP1RKS2iHgAg4CFmdrMBzoBiEhlrGGpI8Bx4A4RcRMRd6zJ7dI1DLX5M9g5Czq+APW7OTsapVQp57BkYYxJAcYAy7A+6L8xxuwRkfEi0tfWbBlwRkSigVXAOGPMGWAOcBjYBewAdhhjvndUrEXO8Y3wwwtQvwfc/jdnR6OUUogxjj3dqrBERESYqKgoZ4dRcJdOwSd3gLs3jFgN3uWdHZFSqgQTkS3GmIic2jlyglvlVWoyfPsIJF6EId9polBKFRmaLIqSH1+G4xvgvilQLczZ0SilVDqtDVVU7JgNm/4LbUdBk/7OjkYppW6gyaIoOLULvn8GarWDruOdHY1SSt1Ek4WzXTtnXXjnXR76fwGu7s6OSCmlbqJzFs6UlgbfDYcLsTBsCZSt6uyIlFIqS5osnGnNW3BoOfSeCDVaOzsapZTKlg5DOcv+H2DN2xD+IEQ85uxolFLqljRZOMOZwzB3BAQ0s3oVWklWKVXEabIobElXrAltFxcYMN26UlsppYo4nbMoTMbAwqfg9F7rCu0KtZwdkVJK5Yr2LArTxo9h93dw5ytQ905nR6OUUrmmyaKwHF1nlfNoeBe0f87Z0SilVJ5osigMF2KtAoEVa8PdH+uEtlKq2NE5C0dLSYRvh1q3SB26CLzKOTsipZTKM00WjvbDCxCzGe6fBlUaOjsapZTKFx2GcqRtMyDqc4h8Ghrf7exolFIq3zRZOErcNlj0HNS+He78p7OjUUqpAtFk4QhXzsDsh8HH31ZJVkf7lFLFm36K2VtaKnz3KFw+BY/+AD6VnR2RUkoVmCYLe1v5OhxZDX0+gMCWzo5GKaXsQoeh7Gnv97Du39BiKLQc6uxolFLKbjRZ2Ev8AZj3pNWb6PWOs6NRSim70mRhD4mXYPaD4OYJA760/lVKqRJE5ywKyhiYPwrOHIKHF4BfkLMjUkopu9NkUVDr34e9C6Hra9Y1FUopVQI5dBhKRHqIyH4ROSQiL2TTZoCIRIvIHhH5OsPymiLyo4jstb0e7MhY8+XIavjpVQi9GyKfcnY0SinlMA7rWYiIK/Ah0BWIATaLyEJjTHSGNvWAF4F2xphzIlIlwya+BN4wxiwXEV8gzVGx5sv5EzDnUahcH/p9qJVklVIlmiN7Fq2BQ8aYI8aYJGAW0C9Tm+HAh8aYcwDGmNMAIhIKuBljltuWXzbGXHVgrHmTnADfPASpyTBwBnj6OjsipZRyKEcmi0DgRIbnMbZlGdUH6ovIehHZKCI9Miw/LyJzRWSbiLxj66k4nzGw5K9W7ad7/guV6zk7IqWUcjhHJousxmVMpuduQD2gIzAY+ExEytuWdwDGAq2AEOCRm3YgMkJEokQkKj4+3n6R38qWqVY12Q5joWHvwtmnUko5mSOTRQxQI8PzICAuizYLjDHJxpjfgP1YySMG2GYbwkoB5gMtMu/AGPOpMSbCGBPh7+/vkDdxY7RRsPRvUOdO6PSS4/enlFJFhCOTxWagnojUFhEPYBCwMFOb+UAnABGpjDX8dMS2bgURuZ4BOgPRONPleJj9EJStBvd9Bi5FY1RMKaUKg8OSha1HMAZYBuwFvjHG7BGR8SLS19ZsGXBGRKKBVcA4Y8wZY0wq1hDUTyKyC2tI63+OijVHqSkwZxhcO2tNaJep6LRQlFLKGcSYzNMIxVNERISJiopyzMaX/R02TIa7P4bwBxyzD6WUcgIR2WKMicipndaGysnuuVaiaPW4JgqlVKmlyeJWTu+FBWMgqDV0/z9nR6OUUk6jySI7CRdg1oPg4WOrJOvh7IiUUspptJBgVtLSYN5IOH8Mhn4P5QKcHZFSSjmVJousrJsI+5dAj7ehVqSzo1FKKafTYajMDq6AlW9Ak/uhzRPOjkYppYoETRYZnTsK3z0GVRtDn/e1kqxSStlosrgu6SrMHgIYGDjdmthWSikF6JyFxRhY9Cyc2g0PfAMVQ5wdkVJKFSmlPlmkpqUS+8u/IXoOtBsN1RrCxePODqvIc3Nxw8PVA09XTzxcPfBw8UB02E6pEqvUJ4sLcVvpffhLqFEd4hbCvMy1DlVuebhYycPd1R1PV8/0ROLp6om7y43LMiaa3Lx+PSHdtMz2+Po2NGEp5RilPln4VGvCm0G9ILAFuJdxdjjFRkpaCkmpSSSmJpKUZvs3NSl9WWJqIsmpydbjNOu1qylXOZ94Pv31pNQkktL+XCfNFPzOubdKKDklnNy87unqiYdLpiSVITlqwlIlValPFp5uXvS5821nh1HqGWNIMSl/JpiMyceWbDInpBuWpWWRpLLYxpWUK5xLPHfTNpLTkklIScDcdH+uvMucUDxcPXAtIjd6VCVT/Qr1mXDHBIfuo9QnC1U0iAju4o67iztlnNTDu56wskxIadkkqYw9qbQskpTtuT16TUplJ7Bs5jtW258mC6VsMiYsH3c9dVqpjPQ6C6WUUjnSZKGUUipHmiyUUkrlSJOFUkqpHGmyUEoplSNNFkoppXKkyUIppVSONFkopZTKkRhT8PIGRYGIxAPHCrCJysAfdgrHnjSuvNG48kbjypuSGFctY4x/To1KTLIoKBGJMsZEODuOzDSuvNG48kbjypvSHJcOQymllMqRJgullFI50mTxp0+dHUA2NK680bjyRuPKm1Ibl85ZKKWUypH2LJRSSuWo1CYLEblfRPaISJqIZHsWgYj0EJH9InJIRF4ohLgqishyETlo+7dCNu1SRWS77cdhNw7P6f2LiKeIzLa9vklEgh0VSx5iekRE4jMcn8cdHZNtv5+LyGkR2Z3N6yIiH9ji3ikiLYpIXB1F5EKG4/WPQoqrhoisEpG9tr/FZ7JoU+jHLJdxFfoxExEvEflVRHbY4no1izaO+3s0xpTKH6AR0ABYDURk08YVOAyEAB7ADiDUwXFNAF6wPX4BeDubdpcL4Rjl+P6BUcB/bY8HAbOLQEyPAJOd8Dt1O9AC2J3N672ApYAAbYFNRSSujsAiJxyvAKCF7XFZ4EAW/5eFfsxyGVehHzPbMfC1PXYHNgFtM7Vx2N9jqe1ZGGP2GmP259CsNXDIGHPEGJMEzAL6OTi0fsA02+NpwN0O3t+t5Ob9Z4x3DnCniIiTY3IKY8xa4OwtmvQDvjSWjUB5EQkoAnE5hTHmpDFmq+3xJWAvkPn+oIV+zHIZV6GzHYPLtqfutp/Mk84O+3sstckilwKBExmex+D4X5qqxpiTYP3SAlWyaeclIlEislFEHJVQcvP+09sYY1KAC0AlB8WT25gA7rMNW8wRkRoOjCcvnPH7lFu32YY3lopI48LeuW24pDnWt+WMnHrMbhEXOOGYiYiriGwHTgPLjTHZHi97/z2W6Htwi8gKoFoWL/3dGLMgN5vIYlmBTx+7VVx52ExNY0yciIQAK0VklzHmcEFjyyQ3798hx+gWcrO/74GZxphEERmJ9U2rswNjyq3CPla5tRWr5MNlEekFzAfqFdbORcQX+A74izHmYuaXs1ilUI5ZDnE55ZgZY1KBcBEpD8wTkTBjTMa5KIcdrxKdLIwxXQq4iRgg47fSICCugNu8ZVwi8ruIBBhjTtq626ez2Uac7d8jIrIa69uPvZNFbt7/9TYxIuIG+OHYIY8cYzLGnMnw9H/A2w6MJy8c8vtUUBk/CI0xS0TkIxGpbIxxeA0kEXHH+kD+yhgzN4smTjlmOcXlzGNm2+d52999DyBjsnDY36MOQ93aZqCeiNQWEQ+sCSOHnXlksxAYans8FLipByQiFUTE0/a4MtAOiHZALLl5/xnj7Q+sNLbZNQfJMaZMY9p9scaci4KFwMO2M3zaAheuDzk6k4hUuz6uLSKtsT4Xztx6LbvsV4ApwF5jzL+zaVboxyw3cTnjmImIv61HgYh4A12AfZmaOe7vsTBn84vSD3APVhZOBH4HltmWVweWZGjXC+tsiMNYw1eOjqsS8BNw0PZvRdvyCOAz2+NIYBfWmUC7gMccGM9N7x8YD/S1PfYCvgUOAb8CIYVwjHKK6f+APbbjswpoWEi/UzOBk0Cy7XfrMWAkMNL2ugAf2uLeRTZn4TkhrjEZjtdGILKQ4mqPNUSyE9hu++nl7GOWy7gK/ZgBTYFttrh2A/+wLS+Uv0e9glsppVSOdBhKKaVUjjRZKKWUypEmC6WUUjnSZKGUUipHmiyUUkrlSJOFUnkgIpdzbnXL9efYrrpHRHxF5BMROWyrIrpWRNqIiIftcYm+aFYVL5oslCoktvpBrsaYI7ZFn2FdXVvPGNMYq1puZWMVSPwJGOiUQJXKgiYLpfLBdkXxOyKyW0R2ichA23IXW+mHPSKySESWiEh/22oPYrsiX0TqAG2Al40xaWCVbjHGLLa1nW9rr1SRoN1cpfLnXiAcaAZUBjaLyFqs0ivBQBOsisF7gc9t67TDupoaoDGw3ViF4bKyG2jlkMiVygftWSiVP+2xKtumGmN+B9Zgfbi3B741xqQZY05hlRu5LgCIz83GbUkkSUTK2jlupfJFk4VS+ZPdDWVudaOZa1i1e8CqK9RMRG71N+gJJOQjNqXsTpOFUvmzFhhouxmNP9atS38F1mHdeMlFRKpi3X7zur1AXQBj3XskCng1Q/XSeiLSz/a4EhBvjEkurDek1K1oslAqf+ZhVf/cAawE/mYbdvoOq7LrbuATrDusXbCts5gbk8fjWDfBOiQiu7DuvXH9Xg2dgCWOfQtK5Z5WnVXKzkTE11h3UKuE1dtoZ4w5ZbsHwSrb8+wmtq9vYy7wosn5PvFKFQo9G0op+1tku0mNB/CarceBMeaaiPwT6z7Jx7Nb2XZTp/maKFRRoj0LpZRSOdI5C6WUUjnSZKGUUipHmiyUUkrlSJOFUkqpHGmyUEoplSNNFkoppXL0/yEJPYEuVlNGAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#accuracy_s1 =np.array(accuracy_s).reshape(len(C_s),len(gamma_s))\n",
    "#Otto_SVM_result = pd.read_csv(\"Otto_SVM_result.csv\")\n",
    "#accuracy_s1 = Otto_SVM_result['accuracy']\n",
    "\n",
    "C_s = np.logspace(-1, 3, 5)# logspace(a,b,N)把10的a次方到10的b次方区间分成N份 \n",
    "gamma_s = np.logspace(-1, 1, 3)  \n",
    "score_s =np.array(score_s).reshape(len(C_s),len(gamma_s))\n",
    "\n",
    "x_axis = np.log10(C_s)\n",
    "for j, gamma in enumerate(gamma_s):\n",
    "    plt.plot(x_axis, np.array(score_s[:,j]), label = ' Test - log(gamma)' + str(np.log10(gamma)))\n",
    "\n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'score' )\n",
    "#plt.savefig('RBF_SVM_Otto.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
